;; Ave Cruce Salus Mea                                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; program  : Win3ds44 - 3d files viewer and manipulator                     ;;
;;                                                                           ;;
;; compiler : flat assembler                                                 ;;
;; author:    Maciej Guba aka macgub                                         ;;
;; web      : http://macgub.co.pl                                            ;;
;; email    : macgub@macgub.co.pl                                            ;;
;;                                                                           ;;
;; This application don't uses any particular 3d graphic library.            ;;
;; Written from scratch in pure assembler. Thanks to all, who was            ;;
;; helping me to do it.                                                      ;;
;;                                                                           ;;
;; Especially:                                                               ;;
;; Jan Pawel II, Tomasz Grysztar, Madis Kalme, Mikolaj Feliks, Lostcauz,     ;;
;; Brian Paul, Reverend, Pablo Reda, MHajduk, Ica, James Foley,              ;;
;; Andries van Dam, Steven Feiner, John Hughes, Richard Phillips,            ;;
;; tthsqe, J. Burkardt, Morgan McGuire, Pierre Bezier, Przemyslaw Kiciak     ;;
;; and many others...                                                        ;;
;;                                                                           ;;
;; Program needs sse4 extension, in full mode. Very basic work when only     ;;
;; sse2 is present. I dont have enough power to provide better support for   ;;
;; oldtimer machines. You may uncomment lines 95 and 96 and test sse2        ;;
;; version. Sorry for rather ugly coding style. This open project - still in ;;
;; development, source may contain some dirt and rough work marks.           ;;
;;                                                                           ;;
;; See release_notes.txt file to read some version depend notes info.        ;;
;; See manual.pdf document for more detailed tips to work with app.          ;;
;;                                                                           ;;
;; General note about implemented operations. - Some of it may run terribly  ;;
;; slow aspecially on 'large' objects. Maybye some tests on low detail       ;;
;; objects may prevent your speed disapointing. Note that even operations on ;;
;; small objects may be slow, caused by way of sloving problems. Its 32 bit  ;;
;; app, so object above ~10 000 000 vertices/faces may be to big for 4GB RAM ;;
;; limit - some operations alloc temporarily many memory, because using mem  ;;
;; greedy pivot lists to speed up calculations. Many operation cannot be     ;;
;; launched  parallel in the same time (ex. editing).                        ;;
;;                                                                           ;;
;; First hint to user is - hit space to switch between rendered              ;;
;; models..                                                                  ;;
;;                                                                           ;;
;; Second hint to user is - be patient after pressing button -               ;;
;;  - give chance to finish operation you select..                           ;;
;;                                                                           ;;
;; After run program shows dialog to open file.                              ;;
;; Choose file in 3ds, asc, ply, md2 or lwo format or press ESC key to       ;;
;; generate object.                                                          ;;
;;                                                                           ;;
;;                                                                           ;;
;;                                 "No work is ever complete,                ;;
;;                                  and this one is no exception."           ;;
;;                                                   Pierre Bezier           ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; win3ds.asm procedures list:
;;
;;   alloc_mem_for_tp
;;   bezier
;;   buttons
;;   calc_shd_verts
;;   do_active_object
;;   do_lights_shadow_stencil_th
;;   do_shadow
;;   do_stencil
;;   draw_menu
;;   ffd
;;   free_mem_for_tp
;;   g_draw
;;   init_point_normals
;;   init_triangles_normals
;;   pack_triangles_normals
;;   long_pipe_init
;;   malloc_proc
;;   mem_error
;;   mfree_proc
;;   normalize_all_light_vectors
;;   peel
;;   pipe_a
;;   prepare_object
;;   prompt_proc
;;   random_pointlight_position
;;   re_alloc_stenc_shadows
;;   rend_com
;;   rotate_normals
;;   rotate_points
;;   translate_points
;;   update_flags_after_keypress
;;   zero_flags
                format PE GUI 4.0
                ; format PE64 GUI 4.0
                entry   start ; initcuda
                sse4 = 0
                sse3 = 1
                sse2 = 2
                Ext = sse4               ; Set to sse3 to achive see3 tensor product
              ;   include 'sse2.inc'     ; uncomment to achive sse2 version
              ;   include 'sse3.inc'     ; uncomment tu run sse3 machines
                include './include/win32a.inc'
              ;   include 'cuda.inc'
                include 'macr.inc'
              ;  include 'jpeg_structs.inc'
                BXRES           = 400    ; choose window size of next edit area
                BYRES           = 400
                MXRES           = 400    ; menu window size
                MYRES           = 450
                TEX_X           = 512    ; texture size
                TEX_Y           = 512
                TEXTURE_SIZE    = (TEX_X * TEX_Y)-1
                TEX_SHIFT       = 9
                TEX_X_S         = 32     ; smaller texture
                TEX_Y_S         = 32
                TEX_SHIFT_S     = 5
                TEXTURE_SIZE_S  = (TEX_X_S * TEX_Y_S)-1
                NEXT_CURV_NUMB  = 80   ; must be divisable by 4
                ; next cuve allow bend operation on /next part/
                FONT_FIX        = 170  ; decreased  size of menu
                ROUND           equ 8
                PROMPTX         = 500
                PROMPTY         = 30
                INFOX           = MXRES
                INFOY           = 140
                IN_CNST         = INFOY  + BYRES ;  + MYRES
                ; some intial macros - just blocks of instructions..
                macro     stretch_menu
                {
                ; empty macro
                }
                macro do_size_vars
                {
                ;   eax = x
                ;   edx = y
                mov       esi,xxaddi
                mov       edi,xres_var
                and       eax,0xfffffffc
                mov       [whole_xres],ax
                sub       eax,MXRES     ; +MXPRES+10   ; place for menu
                and       eax,-8        ; -4 =  motion blur need that, some kind of align
                mov       ebx,eax       ; -8 =  to easier jpeg save
                mov       [edi],ax      ; xres_var
                shr       eax,1
                mov       [esi],eax     ; xxaddi
                and       edx,0xfffffffc
                mov       [whole_yres],dx
                sub       edx,70
                and       edx,-8
                mov       [edi+2],dx    ; yres_var
                mov       ecx,edx
                shr       ecx,1
                mov       [esi+4],ecx   ; yyaddi
                movzx     eax,word[edi] ; xres_var
                mov       ebx,edx       ; yres_var
                sub       eax,6
                sub       ebx,6
                mov       edi,max_work_x
                mov       [edi],ax
                mov       [edi+2],bx
                dec       eax
                dec       ebx
                cvtsi2ss  xmm0,eax
                cvtsi2ss  xmm1,ebx
                movss     [tri_area_x2],xmm0  ; init teslate area
                movss     [tri_area_y2],xmm1
                }
                macro alloc_screen_mem
                {
                pop_abi_regs
                movzx     eax,[xres_var]
                movzx     edx,[yres_var]
                imul      edx,eax
                shr       edx,2 ; 2 bit of each position (pixel)
                add       edx,200
                push      edx
                malloc    edx
                mov       [edit_tri_area_ptr],eax  ; teslate area
                pop       edx
                add       edx,65536
                shl       edx,2
                push      edx
                malloc    edx
                mov       [slices_counter_ptr],eax
                pop       ebx
                shl       ebx,2
                push      ebx
                malloc    ebx
                mov       [screen_ptr],eax
                pop       edx
                push      edx
                malloc    edx
                mov       [new_tex_ptr],eax
                pop       edx
                push      edx
                malloc    edx
                mov       [slices_ptrs_buff_ptr],eax
                pop       edx
                shl       edx,2   ; in Z buffer , also 4 slices buffer
                add       edx,65535
                push      edx
                malloc    edx
                mov       [Zbuffer_ptr],eax
                pop       edx
                shr       ebx,2
                push      edx
                malloc    edx
                mov       [edit_buf_ptr],eax
                pop       edx
                shr       edx,2   ; maybye on bit ??
                malloc    edx
                mov       [shd_buffer_ptr],eax
                push_abi_regs
                }
                macro     prompt arg1
                {
                push      arg1
                call      prompt_proc
                }
                macro     cls
                {
                ; empty macro
                ; call      cls_proc
                }
section '.text' code readable executable
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; start and message loop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    ; initcuda:

    ; 32 bit version of cuda init snippet posted by Roman
    ; on board.flatassembler.net

    ;         lea  ebx,[_cuInit]
    ;      invoke  cuInit,0
    ;        test  eax,eax
    ;         jnz  cuError

    ;         lea  ebx,[_cuDeviceGet]
    ;      invoke  cuDeviceGet,CudaDevice,0   ; get the first device
    ;        test  eax,eax
    ;         jnz  cuError

    ;         lea  ebx,[_cuCtxCreate]
    ;      invoke  cuCtxCreate,CudaContext,CU_CTX_SCHED_SPIN+CU_CTX_MAP_HOST,[CudaDevice]
    ;      ; this context associates the device with this cpu thread
    ;        test  eax,eax
    ;         jnz  cuError

    ;         lea  ebx,[_cuMemAlloc]
    ;      invoke  cuMemAlloc,CudaNumberArray,256*4
    ;        test  eax,eax
    ;         jnz  cuError
    ;         mov  [isCUDA],1
    ;         jmp  start
    ; cuError:
    ; load ptx source
    ;         lea  rbx,[_cuModuleLoadData]
    ;      invoke  cuModuleLoadData,CudaModule,PTXSourceData
    ;        test  eax,eax
    ;         jnz  Error

    ;         lea  rbx,[_cuModuleGetFunction]
    ;      invoke  cuModuleGetFunction,CudaFunction,[CudaModule],PTXFunction
    ;        test  eax,eax
    ;         jnz  Error

              start:
                invoke    GetModuleHandle,0
                mov       [wc.hInstance],eax
                invoke    LoadIcon,0,IDI_APPLICATION
                mov       [wc.hIcon],eax
                invoke    LoadCursor,0,IDC_ARROW
                mov       [wc.hCursor],eax
                push_abi_regs

                mov       eax,1
                cpuid
                ; test bit no 28 of ecx if set - avx on board
                bt        ecx,28
                jnc       @f
                mov       [isAVX],1  ; .avx_present
              @@:
                bt        ecx,20
                jnc       @f
                mov       [isSSE42],1
                ; mov       [draw_flag],15
                jmp       .ns4
              @@:
                ; if      Ext <> SSE3
                ; mov     [maxffd],1     ; switch off sse4 options
                ; end      if
                mov       [max_rpbump],1
                mov       [max_draw],19  ; switch off sse4 render models
              .ns4:
                ; mov       edi,ThreadMaskArray
                ; xor       eax,eax
                ; mov       ecx,64
                ; cld
                ; rep       stosd
                pop_abi_regs
                invoke    GetCurrentProcess
                invoke    GetProcessAffinityMask,eax,ProcessAffinityMask,SystemAffinityMask
                push_abi_regs
                mov       ecx,ThreadMaskArray
                ; core count snippet - 32bit version tthsqe's
                mov       ebx,[ProcessAffinityMask]
                xor       edx,edx
              .loop:
                mov       eax,ebx
                dec       ebx
                and       ebx,eax
                xor       eax,ebx
                inc       edx
                mov       [ecx],eax
                add       ecx,4
                test      ebx,ebx
                jnz       .loop
                mov       [CoresCount],edx

                pop_abi_regs
                invoke    GetSystemMetrics,SM_CXSCREEN
                push      eax
                invoke    GetSystemMetrics,SM_CYSCREEN
                push_abi_regs
                mov       edx,eax
                pop       eax
                mov       [initXsize],eax
                mov       [initYsize],edx
                do_size_vars
                ; fix font and whole menu
                mov       esi,menu_data
                xor       eax,eax
                mov       ebx,20
                cmp       [yres_var],900
                cmovb     eax,ebx
                cmp       ax,[esi+7]
                jne       ..skip_fix
              @@:
                movzx     eax,word[esi+7]
                cdq
                div       ebx
                imul      eax,13
                sub       eax,11
                mov       [esi+3],ax
                add       eax,12
                mov       [esi+7],ax
                add       esi,menu_data.op   ; size of one menu position
                cmp       [esi],byte -1
                jne       @b
                mov       eax,MXRES * 4 * FONT_FIX
                sub       [bbuffer_ptr],eax
                sub       eax, 40 * MXRES
                sub       [inf_screen_ptr],eax
                sub       [next_var], FONT_FIX
              ..skip_fix:
                alloc_screen_mem
                mov       eax,FUR_SIZE*(FUR_SHELLS+1) + 6553
                malloc    eax
                ; fur routines need reedit
                mov       [furs_ptr],eax
                mov       eax,65536*6  ; mem for long pipe size determines with Lp variables
                malloc    eax
                mov       [long_pipe_vert_ptr],eax  ; mem for long pipe vertices
                virtual   at   ecx
                bmiv      BITMAPINFOHEADER
                end       virtual
                macro do_bmi
                {
                mov       ecx,bmi
                mov       [bmiv.biSize],sizeof.BITMAPINFOHEADER
                movzx     eax,[xres_var]
                mov       [bmiv.biWidth],eax  ;XRES
                movzx     edx,[yres_var]
                ; add       edx,30
                neg       edx
                xor       esi,esi
                mov       [bmiv.biHeight],edx ;-YRES-30
                mov       [bmiv.biPlanes],1
                mov       [bmiv.biBitCount],32
                mov       [bmiv.biCompression],BI_RGB
                mov       [bmiv.biSizeImage],esi
                mov       [bmiv.biXPelsPerMeter],esi
                mov       [bmiv.biYPelsPerMeter],esi
                mov       [bmiv.biClrUsed],esi
                mov       [bmiv.biClrImportant],esi
                }
                do_bmi
                virtual   at   ecx
                Wbmiv     BITMAPINFOHEADER
                end       virtual
                ; fake bitmap bmi
                mov       ecx,Wbmi
                mov       [Wbmiv.biSize],sizeof.BITMAPINFOHEADER
                mov       ebx,2
                mov       [Wbmiv.biWidth],ebx  ;XRES
                neg       ebx
                mov       [Wbmiv.biHeight],ebx ;-YRES-30
                mov       [Wbmiv.biPlanes],1
                mov       [Wbmiv.biBitCount],32
                mov       [Wbmiv.biCompression],BI_RGB
                mov       [Wbmiv.biSizeImage],esi
                mov       [Wbmiv.biXPelsPerMeter],esi
                mov       [Wbmiv.biYPelsPerMeter],esi
                mov       [Wbmiv.biClrUsed],esi
                mov       [Wbmiv.biClrImportant],esi
                virtual   at   ecx
                Mbmiv     BITMAPINFOHEADER
                end       virtual
                macro menu_bmi
                {
                mov       ecx,Mbmi
                mov       [Mbmiv.biSize],sizeof.BITMAPINFOHEADER
                mov       [Mbmiv.biWidth],MXRES
                mov       ebx,MYRES + IN_CNST
                movzx     eax,[yres_var]
                cmp       ebx,eax
                cmova     ebx,eax
                neg       ebx
                mov       [Mbmiv.biHeight],ebx  ;-(MYRES + IN_CNST ) ;-30 ;-BYRES-30
                mov       [Mbmiv.biPlanes],1
                mov       [Mbmiv.biBitCount],32
                mov       [Mbmiv.biCompression],BI_RGB
                xor       eax,eax
                mov       [Mbmiv.biSizeImage],eax
                mov       [Mbmiv.biXPelsPerMeter],eax
                mov       [Mbmiv.biYPelsPerMeter],eax
                mov       [Mbmiv.biClrUsed],eax
                mov       [Mbmiv.biClrImportant],eax
                }
                menu_bmi
                xor       eax,eax
                virtual   at  ecx
                Pbmiv     BITMAPINFOHEADER
                end       virtual
                mov       ecx,Pbmi
                mov       [Pbmiv.biSize],sizeof.BITMAPINFOHEADER
                mov       [Pbmiv.biWidth],PROMPTX
                mov       [Pbmiv.biHeight],-PROMPTY
                mov       [Pbmiv.biPlanes],1
                mov       [Pbmiv.biBitCount],32
                mov       [Pbmiv.biCompression],BI_RGB
                mov       [Pbmiv.biSizeImage],eax
                mov       [Pbmiv.biXPelsPerMeter],eax
                mov       [Pbmiv.biYPelsPerMeter],eax
                mov       [Pbmiv.biClrUsed],eax
                mov       [Pbmiv.biClrImportant],eax
                if 0
                ; dib bmi
                mov       [bmiheader.biSize],sizeof.BITMAPINFOHEADER
                movzx     eax,[xres_var]
                mov       [bmiheader.biWidth],eax
                movzx     eax,[yres_var]
                neg       eax
                mov       [bmiheader.biHeight],eax
                mov       [bmiheader.biPlanes],1
                mov       [bmiheader.biBitCount],32
                mov       [bmiheader.biCompression],BI_RGB
                movzx     eax,[xres_var]
                movzx     ebx,[yres_var]
                imul      eax,ebx
                lea       eax,[4*eax+16]
                mov       [bmiheader.biSizeImage],eax
                mov       [bmiheader.biXPelsPerMeter],0
                mov       [bmiheader.biYPelsPerMeter],0
                mov       [bmiheader.biClrUsed],0
                mov       [bmiheader.biClrImportant],0
                invoke    CreateDIBSection,0,bmiheader,0,screen_ptr,0,0
                end if
                virtual   at   ecx
                rectv     RECT
                end       virtual
                mov       ecx,rect
                xor       ebx,ebx
                mov       [rectv.left],ebx
                mov       [rectv.top],ebx
                movzx     eax,[whole_xres]
                mov       [rectv.right],eax
                movzx     edx,[whole_yres]
                mov       [rectv.bottom],edx
                pop_abi_regs
                invoke    AdjustWindowRectEx,rect,WS_POPUP or WS_SYSMENU or WS_CAPTION,0
                invoke    RegisterClass,wc
                xor       edx,edx
                mov       ecx,[rect.bottom]
                mov       eax,[rect.right]
                sub       ecx,[rect.top]
                sub       eax,[rect.left]
                ; sizable  window
                invoke    CreateWindowEx, edx,_class, _title, WS_VISIBLE + WS_OVERLAPPEDWINDOW,\
                edx, edx,  eax, ecx, NULL, NULL, [wc.hInstance], NULL
                mov       [hwnd],eax
                invoke    GetDC,[hwnd]
                mov       [hdc],eax
                invoke    ShowWindow,[hwnd],SW_SHOW
                push_abi_regs
                cld
                lea       esi,[NextMsub]
                lea       edi,[NextMxadd]
                mov       ecx,17
                rep       movsw
                ; unpack Bezier models
                mov       esi,  bezier_nodes
                mov       edi,  tspoon_nodes
                mov       ecx,  ((256 + 251 + 306 + 56)*3/8) + 1
                mov       eax,  1000
                cvtsi2ss  xmm2, eax
                add       eax,  10
                cvtsi2ss  xmm3, eax
                ; each node is in 10 .. 2020 range, unsigned int         
              .ll3:
                push      ecx
                xor       ecx,  ecx             
                movups    xmm0, [esi]
              @@:
                movd      eax,  xmm0
                shr       eax,  cl
                and       eax,  0x7ff
                cvtsi2ss  xmm1, eax
                subps     xmm1, xmm3     
                divps     xmm1, xmm2
                movss     [edi],xmm1
                add       edi,  4
                psrldq    xmm0, 1
                add       cl,   3
                cmp       cl,   24
                jnz       @b               
                pop       ecx
                add       esi,  11  ; 11 bytes
                loop      .ll3

                mov       esi,  bezier_rects
                mov       edi,  tspoon_rect
                mov       ecx,  (16+26+32+6) * 2  
              .ll2:
                push      ecx
                xor       ecx,  ecx             
                movups    xmm0, [esi]
              .ll:
                movd      eax,  xmm0
                shr       eax,  cl
                and       ax,   0x1ff ; each value 9 bits
                stosw
                psrldq    xmm0, 1
                inc       cl
                cmp       cl,   8
                jnz       .ll               
                pop       ecx
                add       esi,  9
                loop      .ll2
            if 0
                mov       esi,lights1
                mov       edi,lights_aligned
                mov       ecx,3
              .lghts_algn:
                push      ecx
                movsd
                movsd
                movsd
                xor       eax,eax
                stosd
                mov       ecx,3
                xorps     xmm1,xmm1
              @@:
                movups    xmm0,[esi]
                punpcklwd xmm0,xmm1
                cvtdq2ps  xmm0,xmm0
                movups    [edi],xmm0
                add       edi,16
                add       esi,8
                loop      @b
                lodsw
                pop       ecx
                loop      .lghts_algn
            end if
                mov       esi,ffd_nodes
                mov       edi,ffd_nodes_intial
                mov       ecx,56 * 3
                rep       movsd
                call      long_pipe_init  ; init control points of long pipe
                ; dependent on segments count
                movzx     eax,[yres_var]
                shr       eax,1
                cvtsi2ss  xmm0,eax
                movss     [scale],xmm0
                call      normalize_plane_equation
                push      dword   fname
                push      dword   hfile
                push      dword   FileSize
                push      dword   File_ptr
                mov       ecx,ofn
                movups    xmm0,[esp]
                add       esp,16
                call      open_file
                ; mov       eax,'firs'    ; mem hint -> first allocation
                mov       bl,'f'          ; next join hint = off
                call      do_active_object
                call      normalize_all_light_vectors
                mov       edi,lights_aligned
                mov       ecx,3
                call      norm_aligned_lv
                call      do_color_buffer
                call      init_envmap2
                mov       edi,bump_map
                call      calc_bumpmap
                call      generate_texture2
                call      init_point_lights
                mov       ecx,FUR_SIZE
                mov       esi,FUR_SHELLS+1
                mov       edi,[furs_ptr]
                call      fur_shell_generator
                xor       eax,eax
                call      draw_menu
                ; mov       esi,fog_matrices
                ; call      init_fog
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;   msgLoop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
             msg_loop:
                cmp       [lpipe_flag],1
                jnae      @f
                call      pipe_a
              @@:
                call      rend_com
                cmp       [bezier_flag],0
                jne       @f
                cmp       [edit_flag],0
                jne       @f
                cmp       [speed_flag],1
                je        @f
                cmp       [lpipe_flag],0
                jne       @f
                pop_abi_regs
                invoke    Sleep,8
                push_abi_regs
             @@:
                pop_abi_regs
                invoke    PeekMessage,msg,0,0,0,PM_NOREMOVE
                push_abi_regs
                or        eax,eax
                je        msg_loop
                pop_abi_regs
                invoke    GetMessage,msg,NULL,0,0
                push_abi_regs
                or        eax,eax
                je        end_loop
                pop_abi_regs
                invoke    TranslateMessage,msg
                invoke    DispatchMessage,msg
                push_abi_regs
                jmp       msg_loop
                ;.error:
                ; invoke  MessageBox,NULL,_error,NULL,MB_ICONERROR+MB_OK
             end_loop:
                invoke    ReleaseDC,[hwnd],[hdc]
                invoke    DestroyWindow,[hwnd]
                invoke    ExitProcess,[msg.wParam]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; WindowProc
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
proc WindowProc uses ebx esi edi, hwnd,wmsg,wparam,lparam
                locals
                 .mxx             rd 40
                 .defwindow       rd 1
                 .coords:         rd 1
                 .end_var         rd 1
                 .calc_combo      rd 1
                 .triangles_ptr   rd 1
                 .points_r_ptr    rd 1
                 .xresd           rd 1
                 .yresd           rd 1
                 .xyres:          rd 1
                 .is_in_main_area rb 1
                endl
           
                mov       eax,.defwndproc
                mov       edx,.end
                mov       ecx,calc_combo
                mov       [.defwindow],eax
                mov       eax,[xres_vard]
                mov       [.end_var],edx
                mov       [.calc_combo],ecx
                mov       [.triangles_ptr],triangles_ptr
                mov       ebx,eax
                mov       [.xyres],eax
                mov       [.points_r_ptr],points_r_ptr
                cwde
                shr       ebx,16
                mov       edx,[lparam]
                mov       [.is_in_main_area],0
                mov       [.xresd],eax
                mov       [.yresd],ebx
                sub       edx,30 shl 16
                mov       [.coords],edx
                xorps     xmm0,xmm0
                movd      xmm1,edx
                movlps    xmm2,[.xyres]
                pcmpgtw   xmm0,xmm1
                pcmpgtw   xmm2,xmm1
                xorps     xmm2,xmm0
                movd      ebx,xmm2
                cmp       ebx,-1
                jne       @f
                mov       [.is_in_main_area],1
                ; cursor in main wnd area
              @@:
                cmp       [wmsg],WM_KEYDOWN
                je        .wmkeydown
                cmp       [wmsg],WM_PAINT
                je        .wmpaint
                cmp       [wmsg],WM_SIZE
                je        .wmsize
                cmp       [wmsg],WM_LBUTTONDOWN
                je        .wmlbuttondown
                cmp       [wmsg],WM_MOUSEMOVE
                je        .wmmousemove
                cmp       [wmsg],WM_LBUTTONUP
                je        .wmlbuttonup
                cmp       [wmsg],WM_RBUTTONDOWN
                je        .wmrbuttondown
                cmp       [wmsg],WM_RBUTTONUP
                je        .wmrbuttonup
                cmp       [wmsg],WM_MOUSEWHEEL
                je        .wmmousewheel
                cmp       [wmsg],WM_DESTROY
                je        .wmdestroy
                jmp       .defwndproc

             .wmrbuttondown:
                cmp       [lpipe_flag],0
                je        @f
                mov       eax,[lparam]
                mov       dword[custom_rot_start_x],eax
             @@:
                mov       [tex_button_pres],1
                mov       eax,[lparam]
                mov       [tex_mov_begin_x],eax
                jmp       [.defwindow]
             .wmrbuttonup:
                xor       eax,eax
                mov       [tex_button_pres],al
                mov       [custom_rot_start_x],ax
             .defwndproc:
                pop_abi_regs
                invoke    DefWindowProc,[hwnd],[wmsg],[wparam],[lparam]
                push_abi_regs
                jmp       .finish
             .wmkeydown:
                cmp       [wparam],VK_UP
                jne       @f
                xor       eax,eax
                jmp       [.defwindow]
             @@:
                xor       ebx,ebx
                cmp       [menu_button_number],bx
                jne       @f
                mov       eax,[wparam]
                call      update_flags_after_keypress
                call      buttons
             @@:
                mov       eax,[wparam]
                cmp       al,'1'
                jne       @f
                call      switch_menu
                xor       eax,eax
                call      draw_menu
                jmp       [.defwindow]
             @@:
                mov       eax,[wparam]
                mov       bl,'m'
                cmp       al,0x4d  ;  m
                je        @f
                cmp       al,0x4e  ;  n
                jne       .no_lo
                mov       bl,'n'
             @@:
                push      eax
                push      ebx
                or        ebx,-1
                call      zero_flags

                push      dword fname
                push      dword hfile
                push      dword FileSize
                push      dword File_ptr
                mov       ecx,ofn
                movups    xmm0,[esp]
                add       esp,16
                call      open_file        ;   next_mesh
                pop       ebx
                ; next   join hint: if bl = 'n' = off
                ;                   if bl = 'm' = on
                call      do_active_object
                pop       eax
                cmp       al,0x4e  ;  n
                jne       @f
                xor       eax,eax
                mov       [NextPointsCount],eax
                mov       [NextTrianglesCount],eax
             @@:
                or        eax,-1
                mov       [curr_chunk],eax
                call      write_info
                xor       eax,eax
                call      draw_menu
                jmp       [.end_var]
             @@:
             .no_lo:
                cmp       [wparam],0x58 ;  'x'
                jne       @f
                push      dword fnametex
                push      dword hfiletex
                push      dword FileSize
                push      dword File_tex_ptr
                mov       ecx,ofntex
                movups    xmm0,[esp]
                add       esp,16
                call      open_file
                mov       esi,[File_tex_ptr]
                mov       ecx,[FileSize]
                or        esi,esi
                jz        @f
                call      read_texture
                call      init_s_tex
             @@:
                cmp       [wparam],VK_ESCAPE
                jne       .end
             .wmdestroy:
                pop_abi_regs
                invoke    PostQuitMessage,0
                push_abi_regs
                xor       eax,eax
                jmp       .end
             .wmmousewheel:
                mov       eax,[lparam]
                sub       eax,PROMPTY  shl 17
                ; modify y coord PROMPTY * 2
                mov       ebx,[wparam]
                shr       ebx,16
                or        edx,-1
                cmp       [edit_particle_no],edx
                je        @f
                add       [edit_start_z],bx
                jmp       .nrm
             @@:
                call      make_bumps
             .nrm:
                call      init_triangles_normals
                call      init_point_normals
                jmp       [.defwindow]
             .wmsize:
                mov       eax,[lparam]
                mov       edx,eax
                shr       edx,16
                cwde
                add       edx,30
            if 0
                cmp       [wmsg],WM_SIZING
                jne       .wmp
                mov       esi,[lparam]
                mov       eax,[esi+8]
                mov       edx,[esi+12]
                add       edx,30
                mov       ecx,[esi]
                mov       ebx,[esi+4]
                mov       [rect.left],ecx
                mov       [rect.top],ebx
                ; movzx     eax,[whole_xres]
                mov       [rect.right],eax
                ; movzx     edx,[whole_yres]
                add       edx,30
                ; neg       edx
                mov       [rect.bottom],edx ;YRES+30
                push      esi
                invoke    AdjustWindowRectEx,rect,WS_POPUP or WS_SYSMENU or WS_CAPTION,0
                pop       esi
                mov       eax,[esi+8]
                mov       edx,[esi+12]
            end if
                mov       edi,minWinX
                cmp       eax,[edi]
                cmovb     eax,[edi]
                cmp       edx,[edi+4]
                cmovb     edx,[edi+4]
                xor       ecx,ecx
                mov       ebx,1
                cmp       [initXsize],eax
                cmovg     ecx,ebx
                cmp       [initYsize],edx
                cmovg     ecx,ebx
                push      ecx
                do_size_vars  ;macro
                pop       ecx
                ; or        ecx,ecx
                ; jz        @f
                ; mov       [initXsize],eax
                ; mov       [initYsize],edx
                ;realloc "screen" mem only if size greater than in start
                ; call      free_screen_mem
                ; alloc_screen_mem  ;macro
             @@:
                do_bmi    ;macro
                menu_bmi
                cls
                jmp       [.defwindow]
              .wmpaint:
                ; redraw all
                invoke    GetClientRect,[hwnd],rect
                invoke    InvalidateRect,[hwnd],rect,FALSE
                invoke    BeginPaint,[hwnd],ps
                mov       [hdc],eax
                invoke    CreateCompatibleDC,eax
                mov       [hMemDC],eax
                invoke    SelectObject,eax,[hBitmap]
                mov       eax,[.xresd]
                mov       edx,[.yresd]
                mov       ebx,[screen_ptr]
                invoke    StretchDIBits,[hdc],0,30,eax,edx,0,0,eax,edx,ebx,bmi,0,SRCCOPY
                invoke    StretchDIBits,[hdc],1,1,PROMPTX,PROMPTY,0,0,PROMPTX,PROMPTY,prompt_screen,Pbmi,0,SRCCOPY
                mov       eax,[.xresd]
                mov       ebx,menu_screen
                mov       ecx,MYRES + IN_CNST
                mov       edx,[.yresd]
                cmp       ecx,edx
                cmova     ecx,edx
                invoke    StretchDIBits,[hdc],eax,0,MXRES,ecx,0,0,MXRES,ecx,ebx,Mbmi,0,SRCCOPY
                call      write_info
                call      NextMdraw_all
                xor       eax,eax
                call      draw_menu
                invoke    DeleteDC,[hMemDC]
                invoke    EndPaint,[hwnd],ps
                jmp       [.defwindow]
              .wmlbuttondown:
      ;          cmp      [tex_flag],1
      ;          jne      @f
      ;          int3
      ;        @@:
                mov       eax,[.coords]
                cmp       [.is_in_main_area],1
                jne       .nxx_edit
                cmp       [sketch_flag],1
                jne       @f
                mov       [sketch_progress],1
              @@:
                xor       edx,edx
                mov       [edit_start_z],dx
                mov       ebx,eax
                shr       ebx,16 ; y
                cwde
                imul      ebx,[.xresd]
                add       ebx,eax
                shl       ebx,2
                add       ebx,[edit_buf_ptr]
                mov       ecx,dword[ebx]
                cmp       ecx,-1
                jz        .nxx_edit
                mov       eax,ecx
                inc       ecx    ; prevent long pipe cause ?
                mov       edx,ffd_flag
                cmp       [edx],byte 0
                jne       .f2
                cmp       [bezier_flag],0
                jne       .f2
             @@:
                mov       [edit_particle_no],ecx
                ; if vert_edit_no = 0, no vertex selected
                xor       edx,edx
                mov       [curr_chunk],eax
                jmp       .f1
             @@:
             .f2:
                dec       ecx
                mov       [ed_bpatch.derive_no],cx
             .f1:
                movzx     edx,[chunks_o_flag]
                or        edx,edx
                jz        .nxx_edit
                cmp       dl,7
                je        .rem_tri
                cmp       eax,[chunks_count]
                jg        .defwndproc
                cmp       dl,3
                je        .mirr
                cmp       dl,5
                je        .geom
                cmp       dl,6
                je        .merg
                cmp       dl,8
                je        .inV
                cmp       dl,9
                je        .del_front_tris_ch
                cmp       dl,2
                je        .tess
                cmp       dl,1
                je        .inver
                cmp       dl,4
                je        .delet
                jmp       [.defwindow]
             .del_front_tris_ch:
                cmp       [inner_vert_ptr],0
                je        .defwndproc
                ; Del tris from curr chunk,
                ; tris with all vertices marked as outside.
                dec       ecx      ; = curr chunk
                call      rem_tris_not_inner_vert
                call      remove_non_tri
                call      write_info
                jmp       .prep_c
             .inV:
                mov       ebx,inner_vert_ptr
                xor       eax,eax
                cmp       [ebx],eax
                je        @f
                mfree     [ebx]
                xor       eax,eax
                mov       [ebx],eax
             @@:
                mov       ax,'ic'
                ; mark inner verts in separate chunk
                dec       ecx      ; = curr chunk
                call      mark_inner_vert
                mov       [inner_vert_number],ecx
                ; if vertex is insde adjacent bit is zeroed
                mov       [inner_vert_ptr],ebx
                call      write_info
                jmp       .prep_c
             .delet:
                call      delete_chunk
                jmp       .prep_c
             .mirr:
                call      mirror_chunk
                jmp       .prep_c
             .inver:
                ; eax - ch no
                call      invert_normals_ch     ; 3
                jmp       .prep_c
             .rem_tri:
                cmp       eax,[triangles_count_var]
                jg        .defwndproc
                mov       ebx,1
                call      rem_tri
                jmp       .prep_c
             .geom:
                push      eax  ;  eax = ch no
                mov       bl,[opt_mesh_flag]
                mov       al,'h'         ;    al = 'h'   - hole filling function
                mov       cl,'m'         ;    al = 'm'   - improve geometry of mesh
                mov       dl,'e'         ;    al = 'e'   - try collapse edges
                cmp       bl,1           ;                 now collapse ed option only preform
                cmove     ax,cx          ;                 massive edges decreasing
                cmp       bl,2
                cmove     ax,dx
                cmp       bl,3
                mov       bl,'c'
                pop       edx  ; edx = chu No.
                call      opt_object2
                call      remove_non_tri
                mov       eax,[triangles_ptr]
                mov       ebx,[points_r_ptr]
                mov       ecx,[triangles_count_var]
                mov       edx,[points_count_var]
                call      remove_unused_vertices
                mov       [points_count_var],ecx
                call      calc_combo
                jmp       .prep_c
             .merg:
                push      eax
                movzx     ebx,[tolerancy_flag]
                inc       ebx
                shl       ebx,1
                ; mov       ecx,ebx
                ; shl       ebx,cl
                cvtsi2ss  xmm0,ebx
                movd      eax,xmm0
                movzx     edx,[disp_fac_flag]
                mov       ecx,edx
                add       ecx,ecx
                shl       edx,cl
                cvtsi2ss  xmm0,edx
                mulps     xmm0,[eps2]           ; 0.000002
                shufps    xmm0,xmm0,11000000b   ; normal vector tolerancy
                mov       ebx,11b
                mov       ecx,111b
                mov       edx,Z_care_flag
                cmp       [edx],byte 0
                cmovne    ebx,ecx    ; button to non chck if in teslate are to do!!
                xor       ecx,ecx
                cmp       [edx],byte 2
                jne       @f
                bts       ecx,3      ;
               @@:
                ; bts      ecx,0    ; check if in teslate area
                ; bts      ecx,1    ; chck normal vect of vert
                bts       ecx,2      ; one chunk merging - edx =  chu No
                pop       edx
                call      remove_redundant_vert_ch    ; merge verts
             .prep_c:
                call      remove_non_tri
                mov       eax,[triangles_ptr]
                mov       ebx,[points_r_ptr]
                mov       ecx,[triangles_count_var]
                mov       edx,[points_count_var]
                call      remove_unused_vertices
                mov       [points_count_var],ecx
                call      [.calc_combo]
                jmp       [.defwindow]
             .tess:
                mov       ebx,'chun'
                mov       ecx,eax  ; ecx = eax = [curr_chunk]
                ; tesselate tris from selected chunk
                call      triangulize_all_faces
                ; out:    esi - new triangles ptr
                ;         edi - new vertices ptr
                ;         ecx - new vertices count
                ;         ebx - new tris count
                mov       edx,ecx
                mov       ecx,ebx
                mov       eax,esi
                mov       ebx,edi
                push      esi  ecx
                call      remove_unused_vertices
                mov       [points_count_var],ecx
                mfree     [triangles_ptr]
                pop       [triangles_count_var]  [triangles_ptr]
                mov       eax,'notp'   ; not free tp
                call      free_mem_for_tp
                xor       eax,eax      ; no alloc for tp
                call      alloc_mem_for_tp
                mov       eax,'firs'   ; no mem work
                call      calc_combo
                mov       ebx,matrix_scaled  ; mx unchanged
                call      rotate_points ;
                call      translate_points
                call      remove_non_tri
                xor       eax,eax
                call      detect_chunks
                mov       [chunks_ptr],ebx
                mov       [chunks_count],ecx
                call      sort_chunks
                call      do_edges_list     ; sort and detect chu inside
                xor       eax,eax
                jmp       [.defwindow]
             .nxx_edit:
                cmp       [ed_bpatch.derive_on],0
                jne       .no_edit_derives
                mov       ebx,[lparam]
                mov       [ed_bpatch.derives_x_start],ebx
                mov       [ed_bpatch.derives_x_end],ebx
                mov       word[ed_bpatch.derive_on],1
             .no_edit_derives:
                cmp       [NextMed_flag],0   ; next mesh edition routines
                je        .pipe
                mov       ecx,[next_var]
                shl       ecx,16
                mov       cx,[.xyres]
                mov       edx,BYRES shl 16 + BXRES
                add       edx,ecx
                movss     xmm0,[lparam]
                movd      xmm1,ecx
                movd      xmm2,edx
                pcmpgtw   xmm1,xmm0
                pcmpgtw   xmm2,xmm0
                xorps     xmm2,xmm1
                movd      edx,xmm2
                cmp       edx,-1
                jne       .pipe
                movd      xmm1,ecx
                psubw     xmm0,xmm1
                movd      ebx,xmm0
                mov       eax,ebx
                mov       edx,BYRES
                cwde
                shr       ebx,16
                imul      ebx,edx
                add       eax,ebx
                shl       eax,2
                add       eax,[bbuffer_ptr] ;Bbuffer
                mov       ebx,[eax]
                shr       ebx,24
                cmp       bl,cl
                jz        .pipe
                ; cmp  ebx ->  ; 1 - xy, 2 - z, 3 - scale, 4 - rotate,
                ; 5- scale xy, 6, 7,   8, 9 - bend
                mov       [NextMotion],bx
             .pipe:
                cmp       [lpipe_flag],1
                jae       .editt
                cmp       [rotary_flag],3
                jne       @f
                mov       eax,[lparam]
                mov       dword[custom_rot_start_x],eax
             @@:
                cmp       [set_tri_area_flag],0
                je        .editt
                cmp       [edit_flag],0
                je        .no_edit
             .editt:
                cmp       [.is_in_main_area],1
                jnz       .no_edit
                mov       eax,[.coords] ;,eax   ; menu coords
                ; eax   - position of cursor in "screen/work" area
                cmp       [set_tri_area_flag],0
                jne        ..edit_mesh ; teslation area work
                movzx     ebx,ax   ; check if edition triangulize area
                mov       ecx,eax
                shr       ecx,16
                mov       edx,[.xresd]
                imul      edx,ecx
                add       ebx,edx
                mov       ecx,ebx
                shr       ebx,2
                and       ecx,11b
                add       ecx,ecx
                inc       ecx
                mov       edi,edit_tri_area
                add       ebx,[edit_tri_area_ptr]
                bt        [ebx],ecx
                jnc       @f
                mov       dword[edi],1
                jmp       .no_edit
             @@:
                inc       ecx
                bt        [ebx],ecx
                jnc       @f
                mov       dword[edi],2
                jmp       .no_edit
             @@:
             ..edit_mesh:
                mov       edi,edit_particle_no
                mov       eax,[.coords]
                cmp       [edit_flag],2
                jne       @f
                cmp       dword[edi],-1
                je        @f
                dec       dword[edi]
             @@:
                cld
                mov       edi,edit_start_x
                stosd
                stosd
                ; edit_start/end_xyz
             .no_edit:
                mov       eax,[lparam]        ; menu work
                mov       ecx,[.xresd]
                cmp       ax,cx
                jna       .defwndproc
                add       ecx,MXRES
                cmp       ax,cx
                jnb       .defwndproc
                ror       eax,16
                cmp       ax,MYRES
                jnl       .defwndproc
                cmp       [menu_button_number],0
                jne       .defwndproc
                mov       esi,menu_data
                mov       eax,[lparam]
                sub       ax,[whole_xres]
                add       ax,MXRES  ;+MXPRES+10
             @@:
                push      eax
                mov       ecx,[esi+1]
                mov       edx,[esi+5]
                call      xy_in_rect
                pop       eax
                or        ebx,ebx
                jnz       @f
                add       esi,menu_data.op      ; size of one position in menu
                cmp       [esi],word -1         ; end mark
                jnz       @b
                jmp       .st
             @@:
                push      dword 2 shl 16 + 2
                movlps    xmm2,[esi+1]
                push      dword -2 shl 16 -2
                movlps    xmm1,[esp]
                psubw     xmm2,xmm1

                pshuflw   xmm2,xmm2,11011000b
                movlps    [esp],xmm2
                pop       eax ebx

                mov       edx,MXRES
                push      dword MYRES
                push      edx
                movlps    xmm7,[esp]
                add       esp,8

                push      dword menu_screen
                push      dword 0x0000ff00
                push      edx

                movups    xmm4,[esp]
                add       esp,12
                xorps     xmm6,xmm6
                push      esi
                ;write_frame:
                ; xmm1 -  buffer ptr
                ; xmm2 -  width
                ; xmm3 -  color
                ; eax  -    x2 shl 16 + x1
                ; ebx  -  y2 shl 16 + y1
                ; xmm6 -  min x, min y
                ; xmm7 -  max x, max y
                call      write_frame
                pop       esi
                mov       byte[esi],1      ; mark button chosen
                or        eax,-1           ; dont clear menu
                call      draw_menu
             .st:
                stretch_menu
                jmp       [.defwindow]
             .wmmousemove:
                cmp       [sketch_progress],1
                jne       @f
                cmp       [.is_in_main_area],1
                jne       @f
                mov       eax,[.coords]
                mov       ebx,eax
                shr       ebx,16
                cwde
                imul      ebx,[.xresd]
                add       ebx,eax
                shr       ebx,3
                and       eax,111b
                add       ebx,[sketch_buff_ptr]
                bts       [ebx],eax
                ; set bit pixel in sketch_buff
              @@:
                cmp       [lpipe_flag],1
                jle       @f
                call      call_l_pipe
              @@:
                stretch_menu
                cmp       [ed_bpatch.derive_on],1
                jne       .Nexf
                push      [lparam]
                pop       dword[ed_bpatch.derives_x_end]
             .Nexf:
                cmp       [NextMed_flag],0   ; next mesh edition routines
                je        .l_pipe
                movzx     ebx,[NextMotion]
                or        ebx,ebx
                jz        .l_pipe
                cmp       ebx,9
                jg        .l_pipe
                mov       eax,[lparam]
                sub       ax,[.xyres]
                ror       eax,16
                sub       ax,[next_varw]
                ror       eax,16
                cmp       ebx,1
                mov       edx,NextMxadd
                jne       @f
                mov       [edx],eax
                jmp       .callM
             @@:
                cmp       ebx,2
                jne       @f
                shr       eax,16
                mov       [edx+NXAZ],ax   ; add Z
                jmp       .callM
             @@:
                cmp       ebx,3
                jne       @f
                shr       eax,16
                mov       [edx+NXSC],ax   ; scale
                jmp       .callM
             @@:
                cmp       ebx,4
                jne       @f
                mov       [edx+NXRT],eax  ; rotate
                jmp       .callM
             @@:
                cmp       ebx,5
                jne       @f
                mov       [edx+NXSXY],eax ; scale XY
                jmp       .callM
             @@:
                cmp       ebx,10          ; bend
                jg        .l_pipe
                lea       edi,[edx+NXB1]
                sub       ebx,6
                shl       ebx,2
                add       edi,ebx
                mov       [edi],eax
             .callM:
                call      write_info
                call      NextMdraw_all
                xor       eax,eax
                call      draw_menu
                cmp       [wave_flag],1
                jne       .defwndproc
                mov       dl,'d'
                xor       ebx,ebx  ; ptr to wav data is empty
                                   ; alloc mem for wav data
                mov       eax,16   ; bits number
                call      save_wav
                mov       [wave_ptr],edx
                mov       [wave_end_ptr],edi
                ; stretch_menu
                jmp       [.defwindow]
             .l_pipe:
                cmp       [lpipe_flag],0
                ja        @f
                cmp       [NextMed_flag],0
                jne       @f
                cmp       [rotary_flag],3
                jne       .tri_rect
             @@:
                cmp       [custom_rot_start_x],0
                je        .tri_rect
                movss     xmm0,[lparam]
                movlps    xmm1,qword[custom_rot_start_x]
                psubw     xmm0,xmm1
                movss     [custom_obj_angle],xmm0
             .tri_rect:
                cmp       [edit_tri_area],0
                je        @f
                movlps    xmm3,[min_work_x]  ; teslate work
                movlps    xmm4,[max_work_x]
                movlps    xmm1,[.coords]
                pmaxsw    xmm1,xmm3
                pminsw    xmm1,xmm4
                movaps    xmm0,xmm1
                xorps     xmm1,xmm1
                punpcklwd xmm0,xmm1
                cvtdq2ps  xmm0,xmm0
                mov       eax,tri_area_x1
                cmp       [edit_tri_area],2
                jnz       @f
                movlps    qword[eax],xmm0
                jmp       .end_tr
             @@:
                cmp       [edit_tri_area],1
                jnz       .end_tr
                movlps    qword[eax+8],xmm0
             .end_tr:
                cmp       [tex_button_pres],1
                jne       .tx_dn
                movss     xmm0,[lparam]
                movss     xmm1,[tex_mov_begin_x]
                psubw     xmm1,xmm0
                movd      eax,xmm1
                movss     [tex_mov_begin_x],xmm0
                mov       ebx,eax
                sar       ebx,16
                cwde
                call      move_texture
                jmp       [.defwindow]
             .tx_dn:
                mov       ebx,min_work_x
                or        eax,-1
                cmp       [edit_particle_no],eax
                jz        .next_one
                movlps    xmm2,[ebx]
                movlps    xmm3,[ebx+4]
                movlps    xmm0,[.coords]
                pmaxsw    xmm0,xmm2
                pminsw    xmm0,xmm3
                movss     dword[edit_end_x],xmm0
             .next_one:
                jmp       [.defwindow]
             .wmlbuttonup:
                cmp       [sketch_progress],1
                jne       @f
             ;   call      find_v_sketch
                call      sketch_3d
                xor       eax,eax   ; free tp mem
                call      calc_combo
             ;   call      flood_sketch
                mov       [sketch_progress],2
              @@:
             ;   call      flood_sketch
             ;   inc       [sketch_progress]
                xor       eax,eax
                call      draw_menu
                cmp       [ed_bpatch.derive_on],1
                jne       .no_bez_submit
             @@:
                cmp       [ffd_flag],3
                jne       @f
                mov       edi,Def_Derv
                mov       esi,Def_Derv_copy
                cld
                mov       ecx,2000 ; will be enough
                rep       movsd
                jmp       .ffd1
             @@:
                mov       esi,matrix_scaled
                lea       edi,[.mxx]
                call      reverse_mx_3x3
                movzx     eax,[ed_bpatch.derive_no]
                imul      eax,12
                mov       edi,ffd_nodes
                mov       esi,ffd_nodes_rot
                movzx     edx,[ffd_flag]
                cvtdq2ps  xmm3,[xxadd]
                cmp       dl,1
                jne       .f_cage
             .one_patch:
                rcpps     xmm1,[ffd_scale]
                movups    xmm0,[esi+eax]
                subps     xmm0,xmm3
                subps     xmm0,[ffd_center]
                shufps    xmm1,xmm1,0
                mulps     xmm0,xmm1
                movlps    [edi+eax],xmm0
                movhlps   xmm0,xmm0
                movss     [edi+eax+8],xmm0
                jmp       .ffd1
             @@:
             .f_cage:
                cmp       [bezier_flag],0    ; edit bezier patch
                cmovne    edi,[derv_nodes_ptr]
                ; mov      edi,[ffd_nodes_intial_rot]
                ;.spli_deform:
                add       edi,eax
                add       esi,eax
                movups    xmm0,[esi]
                ; subps    xmm0,xmm3
                ; movss    xmm2,[scale]
                ; shufps   xmm2,xmm2,0
                cvtdq2ps  xmm3,[xxadd]
                ; mulps    xmm3,xmm2
                ; rcpps    xmm2,xmm2
                ; movaps   xmm2,[ffd_scale]
                ; mulps    xmm2,xmm3
                ; shufps   xmm2,xmm2,0
                ; mulps    xmm0,xmm2
                subps     xmm0,xmm3
                ; mulps    xmm0,xmm2
                movlps    [esi],xmm0
                movhlps   xmm0,xmm0
                movss     [esi+8],xmm0
                ; movlps   [edi],xmm0
                ; movhlps  xmm0,xmm0
                ; movss    [edi+8],xmm0
                mov       ecx,1
                lea       ebx,[.mxx]
                call      rotary
             .ffd1:
                xor       eax,eax
                mov       [ed_bpatch.derive_on],al
                mov       [ed_bpatch.derives_x_start],eax
                mov       [ed_bpatch.derives_x_end],eax
             .no_bez_submit:
                xor       eax,eax
                mov       [NextMotion],ax   ; next mesh x y
                cmp       [lpipe_flag],0
                jne       @f
                mov       [custom_rot_start_x],ax
              @@:
                mov       [edit_tri_area],al
                cmp       [edit_particle_no],-1    ;0   last_chg
                je        .segment_rot
                mov       al,[lpipe_flag]
                or        al,al
                jz        .no_lp1
                mov       esi,long_pipe_derv
                mov       edi,long_pipe_derv_rotated
                call      edit_submit
                mov       eax,[tex_scale]
                call      calc_bumpmap_coords  ; bump , tex
                or        ebx,-1
                xor       eax,eax
                cld
                mov       [edit_particle_no],ebx
                mov       edi,edit_start_x
                stosd
                stosd
                jmp       .segment_rot
             .no_lp1:
                mov       [do_submit],1      ; submit after rendering
             .segment_rot:
                ; GUI buttons work
                cmp       [menu_button_number],0
                jne       .defwndproc
                mov       eax,[lparam]     ; check if sth in menu marked
                sub       ax,[whole_xres]
                cmp       ax,-MXRES ;+MXPRES+10)
                jl        .dr_mn
                ror       eax,16
                cmp       ax,MYRES
                jg        .dr_mn
                ror       eax,16
                mov       esi,menu_data
                mov       eax,[lparam]
                sub       ax,[whole_xres]
                add       ax,MXRES
             @@:
                push      eax
                mov       ecx,[esi+1]
                mov       edx,[esi+5]
                call      xy_in_rect
                pop       eax
                or        ebx,ebx
                jnz       @f
                add       esi,menu_data.op              ; size of one position in menu
                cmp       [esi],byte -1
                jnz       @b
                jmp       .dr_mn
             @@:
                cmp       byte[esi],1
                jne       .dr_mn
                ; this    button was pressed (pointed in esi)
                ; update flags
                movzx     eax,word[esi+menu_data.butt_no - menu_data]
                mov       dl,[eax+menu_flags-1]
                inc       dl
                cmp       dl,[esi+menu_data.max_flag - menu_data]
                jne       @f
                xor       dl,dl
             @@:
                xor       ebx,ebx
                cld
                mov       edi,menu_button_number
                cmp       [edi],bx
                jne       @f
                stosw
                mov       [eax+menu_flags-1],dl
                xor       eax,eax
                call      draw_menu
                call      buttons
                jmp       [.defwindow]
             @@:
             .dr_mn:
                xor       eax,eax
                call      draw_menu
                jmp       [.defwindow]
             .finish:
             .end:
ret
endp

buttons:
;===================================================================
;===================================================================
;======================== menu GUI buttons==========================
;===================================================================
;===================================================================
                push        ebp
                mov         ebp,esp
                sub         esp,128
                .end_var                 equ dword[ebp-4]
                .calc_combo              equ dword[ebp-8]
                .points_count_var        equ dword[ebp-48]
                .triangles_count_var     equ dword[ebp-44]
                .points_r_ptr            equ dword[ebp-40]
                .triangles_ptr           equ dword[ebp-36]
                .points_rotated_ptr      equ dword[ebp-32]
                .pnr                     equ dword[ebp-28]
                .edges_ptr               equ dword[ebp-24]
                .write_info              equ dword[ebp-52]
                .remove_unused_verticesB equ dword[ebp-56]
                .remove_non_tri          equ dword[ebp-60]
                .prepare_object          equ dword[ebp-64]
                .calc_bumpmap_coords     equ dword[ebp-68]
                .tex_scale               equ dword[ebp-72]


                .tex_flag          equ byte [ebp-127]
                .save_flag         equ byte [ebp-126]
                .process_flag      equ byte [ebp-125]
                .culling_flag      equ byte [ebp-124]
                                 ; equ byte [ebp-123]
                                 ; equ byte [ebp-122]
                .hrt_flag          equ byte [ebp-121]
                .xchg_flag         equ byte [ebp-120]
                .edit_flag         equ byte [ebp-119]
                .set_tri_area_flag equ byte [ebp-118]
                .disp_col_flag     equ byte [ebp-117]
                .disp_fac_flag     equ byte [ebp-116]
                .bezier_flag       equ byte [ebp-115]
                .tolerancy_flag    equ byte [ebp-114]
                .derive_flag       equ byte [ebp-113]
                .td_wp_flag        equ byte [ebp-112]
                .speed_flag        equ byte [ebp-111]
                                 ; equ byte [ebp-110]
                .lpipe_flag        equ byte [ebp-109]
                .lpsegs_flag       equ byte [ebp-108]
                .lpcurve_tp        equ byte [ebp-107]
                .to_piec_flag      equ byte [ebp-106]
                .NextMed_flag      equ byte [ebp-105]
                                 ; equ byte [ebp-104]
                .normals_flag      equ byte [ebp-103]
                .Z_care_flag       equ byte [ebp-102]
                                 ; equ byte [ebp-101]
                .mark_coll_ed_flag equ byte [ebp-100]
                .s_tex_flag        equ byte [ebp-99]
                                 ; equ byte [ebp-98]
                                 ; equ byte [ebp-97]
                .chunks_o_flag     equ byte [ebp-96]
                                 ; equ byte [ebp-95]; crop fr
                .stencil_s_flag    equ byte [ebp-94]
                                 ; equ byte [ebp-93]
                                 ; equ byte [ebp-92]
                .inner_vert_flag   equ byte [ebp-91]
                                 ; equ byte [ebp-90]
                                 ; equ byte [ebp-89]
                                 ; equ byte [ebp-88]
                .tes_mod_flag      equ byte [ebp-87]

            ;    .show_ch_flag     equ byte [ebp-86]   \
                                 ; equ byte [ebp-85]    |
                                 ; equ byte [ebp-84]    |
                                 ; equ byte [ebp-83]    | not
                                 ; equ byte [ebp-82]    | used
                                 ; equ byte [ebp-81]    |
            ;    .submit_flag      equ byte [ebp-80]    |
            ;    .zero_Nx_flag     equ byte [ebp-79]    |
            ;    .ccounter_inc     equ byte [ebp-78]    |
            ;    .shadow_flag      equ byte [ebp-77]   /
                .ffd_flag          equ byte [ebp-86]
                .rph_bump_flag     equ byte [ebp-85]

                                 ; equ byte [ebp-84]
                .valencEd_flag     equ byte [ebp-83]
                .from_tex_flag     equ byte [ebp-82]
                .opt_mesh_flag     equ byte [ebp-81]
                                 ; equ byte [ebp-80]
                .morph_flag        equ byte [ebp-79]
                .wave_flag         equ byte [ebp-78]

                mov      eax,[tex_scale]
                mov      .end_var,.end
                mov      .calc_combo,calc_combo
                mov      .write_info,write_info
                mov      .remove_unused_verticesB,remove_unused_vertices
                mov      .remove_non_tri,remove_non_tri
                mov      .prepare_object, prepare_object
                mov      .calc_bumpmap_coords,calc_bumpmap_coords
                mov      .tex_scale,eax
                cld
                lea      esi,[points_count_var]
                lea      edi,.points_count_var
                mov      ecx,7
                rep      movsd
                mov      esi,tex_flag
                lea      edi,.tex_flag
                mov      ecx,11
                rep      movsd
                mov      esi,ffd_flag
                lea      edi,.ffd_flag
                or       ecx,9
                rep      movsb
                movzx    edx,[menu_button_number]
                or       edx,edx
                jz       .end
                dec      edx
                add      edx,edx
                lea      esi,[menu_jmp_table_ptr]
                movzx    edx,word[esi+edx]
                or       edx,edx
                jz       .end
                add      edx,buttons
                jmp      edx
             .nx71:
                mfree    [sketch_buff_ptr]
                xor      eax,eax
                mov      [sketch_buff_ptr],eax
                cmp      [sketch_flag],1
                jne       .end
                mov      [points_count_var],0
                mov      [triangles_count_var],0
                mfree    [triangles_ptr]
                mfree    [points_r_ptr]
                mov      eax,xres_var
                movzx    ebx,word[eax]
                movzx    ecx,word[eax+2]
                ; one bit per pix
                imul     ebx,ecx
                push     ebx
                shr      ebx,3
                push     ebx
                add      ebx,16
                malloc   ebx
                mov      [sketch_buff_ptr],eax
                mov      edi,eax
                xor      eax,eax
                pop      ecx
                inc      ecx
                shr      ecx,2
                cld
                rep      stosd
                jmp      .end_var
             .nx70:
                mov      eax,3
                ; tesselate
                ; take intersection point as new vertex
                call     mark_colided_edges
                call     remove_non_tri
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                call     .calc_combo
                jmp      .end_var
             .nx69:
                xor      eax,eax
                cmp      [inner_vert_ptr],eax
                je       .end
                call     rem_TIV_all
                call     remove_non_tri
                xor      edx,edx
                cmp      [inner_vert_ptr],edx
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     remove_unused_vertices
                call     .prepare_object
                jmp      .end_var
             .nx68:
                cmp      .wave_flag,0
                jne      @f
                mfree    [wave_ptr]
                jmp      .end_var
              @@:
                mov      dl,'d'
                xor      ebx,ebx  ; ptr is empty
                                  ; alloc mem for wav data
                mov      eax,16   ; bits number
                call     save_wav
                mov      [wave_ptr],edx
                mov      [wave_end_ptr],edi
                jmp      .end_var
             .nx67:
                cmp      .morph_flag,0
                jne      @f
                call     do_active_object
              @@:
                jmp      .end_var
             .nx66:
                mov      bl,.opt_mesh_flag
                mov      di,'cc'
                mov      al,'h'          ;    al = 'h'   - hole filling function
                mov      cl,'m'          ;    al = 'm'   - improve geometry of mesh
                mov      dl,'e'          ;    al = 'e'   - try collapse edges
                cmp      bl,1            ;                 now collapse ed option only preform
                cmove    ax,cx           ;                 massive edges decreasing
                cmp      bl,2            ;    al = 'c'   - remove clenched triangles
                cmove    ax,dx
                cmp      bl,3
                cmove    ax,di
                call     opt_object2
                call     .remove_non_tri
                mov      eax,[triangles_ptr]
                mov      ebx,.points_r_ptr
                mov      ecx,[triangles_count_var]
                mov      edx,.points_count_var
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                ; mov    eax,'opt'
                ; call   do_edges_list_ch    ; sort and detect chu inside
                ; call   .prepare_object
                call     .calc_combo
                jmp      .end_var
             .nx42:
                cmp      .stencil_s_flag,0
                jne      .st_alloc
                xor      al,al   ; al = 0 -> mfree
                call     re_alloc_stenc_shadows
                jmp      .end_var
             .st_alloc:
                mov      al,1  ; al = 1 -> malloc
                call     re_alloc_stenc_shadows
                jmp      .end_var
             .nx55:
                call     .remove_non_tri
                mov      eax,.triangles_ptr
                mov      ebx,.points_r_ptr
                mov      ecx,[triangles_count_var]
                mov      edx,.points_count_var
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                ; sort and detect chu inside
                call     sort_chunks
                call     opt_chunks
                call     do_edges_list
                mov      eax,.tex_scale
                call     .calc_bumpmap_coords
                jmp      .end_var
             .nx54:
                call     make_series
                call     .calc_combo
                jmp      .end_var
             .nx53:
                call     fix_normals
                call     .prepare_object
                jmp      .end_var
             .nx52:
                mov      eax,tex_scale
                movlps   xmm0,[eax]
                mulss    xmm0,[scale_plus]
                movss    [eax],xmm0
                movd     eax,xmm0
                call     .calc_bumpmap_coords
                jmp      .end_var
             .nx51:
                mov      eax,tex_scale
                movlps   xmm0,[eax]
                mulss    xmm0,[scale_minus]
                movss    [eax],xmm0
                movd     eax,xmm0
                call     .calc_bumpmap_coords
                jmp      .end_var
             .nx48:
                ; try to remove cracks
                ; Fix sitution when in the middle of edge ocurr vertex.
                ; This cause arise when we have wrong declaration
                ; of triangles in tris net. In my implementation some numeric
                ; errors could arise :(
                ; To do - not only mid vertex calculation, but every vert along edge.
                ; I tried do above - numerical errors I cannot accept..
                or       ebx,-1
                call     zero_flags  ; zero colided edges and chunks count
                call     rem_cracks
                call     .calc_combo
                call     .write_info
                or       eax,-1
                call     draw_menu
                jmp      .end_var
             .nx47:
                call     .remove_non_tri
                mov      eax,.triangles_ptr
                mov      ebx,.points_r_ptr
                mov      ecx,[triangles_count_var]
                mov      edx,.points_count_var
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                call     .prepare_object
                or       ebx,-1
                call     zero_flags  ; zero colided edges and chunks count
                call     .write_info
                jmp      .end_var
             .nx46:
                cmp      .mark_coll_ed_flag,0
                je       .end
                cmp      .inner_vert_flag,0
                je       .end

                mov      ebx,[edges_intersect_ptr]
                mov      esi,[inner_vert_ptr]
                or       esi,esi
                jz       .end
                ; delete triangles with inside vertices and without intersecting edges
                ; in  esi - ptr to inside vertices mask list
                ;     ebx - ptr to intersecting edges mask list
                call     del_tiv_ie_without
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                call     .prepare_object
                or       ebx,-1
                call     zero_flags  ; zero colided edges and chunks count
                call     .write_info
                jmp      .end_var
             .nx45:
                xor      eax,eax
                ; inside vertices
                cmp      .inner_vert_flag,al
                je       .end
                cmp      [inner_vert_ptr],eax
                je       @f
                mfree    [inner_vert_ptr]
                mov      [inner_vert_ptr],0
             @@:
                call     mark_inner_vert
                mov      [inner_vert_number],ecx
                ; if vertex is insde adjacent bit is zeroed
                mov      [inner_vert_ptr],ebx
                call     .write_info
                jmp      .end_var
             .nx44:
                ; teselate triangles with at last one inner vertex
                ; and with edge that intersect
                ; in: esi - ptr to inside vertices mask list
                ;     ebx - ptr to intersecting edges mask list
                ;     ecx - colided edges number
                ;     edx - inner vertices number
                xor      ebx,ebx
                cmp      [inner_vert_ptr],ebx
                je       .end
                cmp      [edges_intersect_ptr],ebx
                je       .end
                ; mov      esi,[inner_vert_ptr]
                ; mov      edx,[edges_intersect_ptr]
                ; mov      ebx,[edges_coll_count]
                ; mov      ecx,[inner_vert_number]
                ; xor      eax,eax
                ; or       eax,100b
                mov      eax,'tiv'
                call     triangulize_all_faces
                ; out:      esi - new triangles ptr
                ;           edi - new vertices ptr
                ;           ecx - new vertices count
                ;           ebx - new tris count
                mov      edx,ecx
                mov      ecx,ebx
                mov      eax,esi
                mov      ebx,edi
                push     esi ecx
                call     remove_unused_vertices
                mov      [points_count_var],ecx
                mfree    [triangles_ptr]
                pop      [triangles_count_var]  [triangles_ptr]
                mov      eax,'notp'   ; not free tp
                call     free_mem_for_tp
                xor      eax,eax      ; no alloc for tp
                call     alloc_mem_for_tp
                mov      eax,'firs'   ; no mem work
                call     calc_combo
                mov      ebx,matrix_scaled  ; mx unchanged
                call     rotate_points     ;
                call     translate_points
                call     remove_non_tri

                xor      eax,eax
                call     detect_chunks
                mov      [chunks_ptr],ebx
                mov      [chunks_count],ecx
                call     sort_chunks

                call     do_edges_list      ; sort and detect chu inside
                ; mov      [edges_count],ecx
                call     rem_cracks
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                ; call   .remove_non_tri
                ; call   zero_flags  ; zero colided edges and chunks count
                ; call   .write_info
                ; call   draw_menu
                jmp      .end_var
             .nx41:
                ;  crop front faces
                mov      edi, .points_rotated_ptr
                mov      ecx, .points_count_var
                call     normalize_object
                or       ebx,-1
                mov      edi,[points_rotated_ptr]
                call     rm_inner_faces_th
                ; virtual at edx
                ; ll =      ptr_lab
                ; end      virtual
                ;     mov     edx, ptr_lab
                ; edx equ
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                call     .calc_combo
                jmp      .end_var
             .nx39:
                ; clip  faces
                call     clip_triangles  ;  clipping faces in teslate area
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                call     .calc_combo
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                jmp      .end_var
             .nx38:
                ; smooth inside edges
                mov      eax,1   ;   smooth inside edges, 1st algo
                call     mark_colided_edges  ;  try to smooth inside edges
                call     .calc_combo
                jmp      .end_var
             .nx43:
                mov      eax,2 ; smooth inside edges, sec algo
                call     mark_colided_edges
                call     .calc_combo
                jmp      .end_var
             .nx35:
                ; remove  inner faces
                mov      edi,.points_r_ptr
                call     rm_inner_faces
                mov      eax,[triangles_ptr]
                mov      ebx,[points_r_ptr]
                mov      ecx,[triangles_count_var]
                mov      edx,[points_count_var]
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                call     .calc_combo
                jmp      .end_var
           ;  .nx37:

             .nx36:
                ; detect intersecting edges
                cmp      [edges_intersect_ptr],0
                je       @f
                mfree    [edges_intersect_ptr]
                xor      eax,eax
                mov      [edges_coll_count],eax
                mov      [edges_intersect_ptr],eax
             @@:
                cmp      .mark_coll_ed_flag,0
                je       .end
                mov      ebx,[edges_count]
                shr      ebx,3
                add      ebx,32
                malloc   ebx
                mov      [edges_intersect_ptr],eax
                mov      edi,eax
                mov      ecx,ebx
                shr      ecx,2

                xor      eax,eax
                mov      [edges_coll_count],eax
                xor      eax,eax   ; - no smooth inside edges
                call     mark_colided_edges  ; or try to smooth inside edges
                call     .write_info
                jmp      .end_var
             .nx32:
                ; remove  redundant  vertices
                movzx    ebx,.tolerancy_flag
                inc      ebx
                shl      ebx,1
                ; mov      ecx,ebx
                ; shl      ebx,cl
                cvtsi2ss xmm0,ebx
                movd     eax,xmm0
                movzx    edx,.disp_fac_flag
                mov      ecx,edx
                add      ecx,ecx
                shl      edx,cl
                cvtsi2ss xmm0,edx
                mulps    xmm0,[eps2]           ; 0.000002
                shufps   xmm0,xmm0,11000000b   ; normal vector tolerancy
                movzx    edx,.Z_care_flag
                mov      ebx,11b
                mov      ecx,111b

                or       edx,edx
                cmovne   ebx,ecx       ; button to non chck if in teslate are to do!!
                xor      ecx,ecx
                cmp      dl,2  ;non destroy manif chunks struct?
                jne      @f
                bts      ecx,3
               @@:
         ;       cmp      dl,1
         ;       jne      @f
         ;       bts      ecx,2           ; dont mind about chunks
         ;      @@:

                ; bts    ecx,0           ; check if in teslate area
                ; bts    ecx,1           ; chck normal vect of vert
                ; bts    ecx,2           ; one chunk merging - edx chu No
                ; bts    ecx,3           ; non desty manif chunjs struct
                ; xorps  xmm2,xmm2       ; if xmm2 = 0 -> process points_rotated
                call     remove_redundant_vert_ch
                call     .prepare_object
                jmp      .end_var

             .nx26:
                ; remove unused vertices
                mov      eax,.triangles_ptr
                mov      ebx,.points_r_ptr
                mov      ecx,.triangles_count_var
                mov      edx,.points_count_var
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                call     .remove_non_tri
                call     .prepare_object
                jmp      .end_var
             .nx31:
              ;  mfree    [sketch_buff_ptr]
              ;  xor      eax,eax
              ;  mov      [sketch_buff_ptr],eax
                ; edit  next mesh
                call     .write_info
                call     NextMdraw_all
                invoke   SendMessage,[hwnd],WM_PAINT ,0,0
           ;     cmp      [NextMed_flag],5
           ;     jne       .end
           ;     mov      [points_count_var],0
           ;     mov      [triangles_count_var],0
           ;     mfree    [triangles_ptr]
           ;     mfree    [points_r_ptr]
           ;     mov      eax,xres_var
           ;     movzx    ebx,word[eax]
           ;     movzx    ecx,word[eax+2]
           ;     ; one bit per pix
           ;     imul     ebx,ecx
           ;     push     ebx
           ;     shr      ebx,3
           ;     push     ebx
           ;     add      ebx,16
           ;     malloc   ebx
           ;     mov      [sketch_buff_ptr],eax
           ;     mov      edi,eax
           ;     xor      eax,eax
           ;     pop      ecx
           ;     inc      ecx
           ;     shr      ecx,2
           ;     cld
           ;     rep      stosd
                jmp      .end_var
             .nx3:
                ; zoom in
                mov      esi,scale_plus
                jmp      .scale
             .nx4:
                ; zoom out
                mov      esi,scale_minus
             .scale:
                mov      edi,scale
                movlps   xmm0,[edi]
                mulss    xmm0,[esi]
                movss    [edi],xmm0
                jmp      .end_var
             .nx5:
                ; tex map
                ; bump, texture - spherical or planar mapping
                mov      eax,.tex_scale
                call     .calc_bumpmap_coords
                jmp      .end_var
             .nx6:
                ; random directional lights
                call     make_random_lights
                call     normalize_all_light_vectors
                call     init_point_lights
                call     do_color_buffer
                call     init_envmap2
                jmp      .end_var
             .nx30:
                ; long pipe derives
                ; init work, depend segs cnt.
                cmp      .to_piec_flag,0
                jne      @f
                call     do_active_object
                jmp      .end_var
             @@:
                call     to_pieces
                mov      eax,'notp'   ; not free tp
                call     free_mem_for_tp
                xor      eax,eax      ; no alloc for tp
                call     alloc_mem_for_tp
                call     calc_combo
                jmp      .end_var
             .nx28:
                ; long pipe derivesv
                ; init work, depend segs cnt.
                call     long_pipe_init
                jmp      .end_var
                ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
             .nx27:
                mov      ebx,LPIP_F ; flag to stay intact
                call     zero_flags
                mov      al,.lpipe_flag
                cmp      al,0
                je       .act
                cmp      al,1
                je       @f
                cmp      al,1
                jae      .lp_flg2
                xor      ebx,ebx
                mov      [NextPointsCount],ebx
                mov      [NextTrianglesCount],ebx
             @@:
                jmp      .end_var
             .lp_flg2:
          ;      mov      ebx,LPIP_F ; flag to stay intact
          ;      call     zero_flags
                mov      eax,'frea'
                call     free_mem_for_tp
                mov      [triangles_count_var],50000   ; always 20000 elements as minimal
                mov      [points_count_var],50000
                mov      eax,'alla'
                call     alloc_mem_for_tp
                ; calculate long pipe
                call     call_l_pipe
                call     .write_info
                call     draw_menu
                jmp      .end_var
             .act:
                xor      ebx,ebx         ; next join hint = off
                call     do_active_object
                xor      eax,eax
                call     draw_menu
                jmp      .end_var
                ;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
           ;  .nx60:
           ;     mov      ebx,FFD_F ; flag to stay intact
           ;     call     zero_flags
           ;     jmp      .end_var
             .nx17:
                mov      ebx,EDIT_F or CHUN_F ; flags to stay intact
                call     zero_flags
                jmp      .end_var
             .nx7:
                ; displaying models
                cls
                call     do_color_buffer
                jmp      .end_var
             .nx8:
                ; bumps  random/texture dependent
                call     calc_bumpmap
                jmp      .end_var
             .nx9:
                ; bumps  random/texture dependent
                movzx    eax,[tex_flag]
                or       eax,eax
                jne      @f
                call     generate_texture2    ; draw stripes
                jmp      .tex_end
             @@:
                cmp      eax,1
                jne      @f
             ;   mov      esi,jpeg
             ;   mov      edi,texmap
             ;   call     reconstruction
                call     generate_texture3    ; draw xor
                jmp      .tex_end
             @@:
                cmp      eax,2
                jne      @f
                ; mov      eax,tex_scale1
                movss    xmm0,.tex_scale
                ; call    do_fog
                call     do_mandel_tex
                jmp      .tex_end
             @@:
                call     generate_texture1    ; white area
             .tex_end:
                mov      edi,bump_map
                call     calc_bumpmap
                call     init_point_lights
                jmp      .end_var
             .nx11:   ; process save

                ; do counter work
                mov      edi,object_cnt
                mov      esi,save_flag
                movzx    ebx,byte[esi]
                imul     ebx,12
                mov      eax,Bfile_name + 6
                movzx    edx,byte[edi]
                add      eax,ebx
                mov      [eax],dl

                inc      edx
                mov      eax,'A'
                cmp      edx,'Z'
                cmove    edx,eax
                mov      [edi],dl
                movzx    edx,byte[esi]
                shl      edx,2
                add      edx,save_procs
                ; or     eax,eax
                ; jnz    @f
                ; push   [triangles_count_var]    ; pushes prevent crash in
                ; push   [points_count_var]       ; above 65535 vertices or tris
                ; call   dword[eax]               ; situation
                ; pop    [points_count_var]
                ; pop    [triangles_count_var]
                ; jmp    .end_var
             @@:
                mov      eax,20    ; info for *.wav sav proc = 16 bit sample
                                   ; audio laudness
                xor      ebx,ebx   ; info for *.wav = no mem adress
                call     dword[edx]
                jmp      .end_var
             .nx13:
                ; do displacement
                pushad
                call     apply_displacement_from_tex
                popad
                call     .prepare_object
                jmp      .end_var
             @@:
             .nx14:
                ; do teslation
                xor      eax,eax
                xor      ebx,ebx
                call     triangulize_all_faces ;triangulize_faces
                ; out:   esi - new triangles ptr
                ;        edi - new vertices ptr
                ;        ecx - new vertices count
                ;        ebx - new tris count
                cmp      .tes_mod_flag,0  ; check if teslate area ?
                jne      .not_area        ; if zero - not 0 t, 1 - w
                cmp      .td_wp_flag,1    ; positive Z only  ?
                je       .skpp
             .not_area:
                push     esi edi ebx
                mov      [points_count_var],ecx
                mfree    [points_r_ptr]
                mfree    [triangles_ptr]
                pop      [triangles_count_var] [points_r_ptr]  [triangles_ptr]
                call     .calc_combo
                jmp      .end_var
             .skpp:
                push     esi edi ebx
                mov      edx,ecx
                mov      ecx,ebx
                mov      eax,esi
                mov      ebx,edi
                ; push     esi edi ecx
                call     .remove_unused_verticesB
                mov      [points_count_var],ecx
                pop      [triangles_count_var] [points_r_ptr]  [triangles_ptr]
                mov      eax,'frea'   ; free all
                call     free_mem_for_tp
                xor      eax,eax      ; no alloc for tp
                call     alloc_mem_for_tp
                mov      eax,'firs'   ; no mem work
                call     .calc_combo
                call     .remove_non_tri
                call     rem_cracks
                jmp      .end_var
             .nx15:
                ; curremnt object
                xor      ebx,ebx         ; next join hint = off
                call     do_active_object
                jmp      .end_var
             .nx16:
                ; rando m point light position
                ; and pplane equation
                call     random_point_light_position
                call     normalize_plane_equation
                jmp      .end_var
             .nx19:
                ; trans parent displacement color
                cmp      .disp_col_flag,0
                jne      @f
                mov      [displ_transparent_col],0x00ffffff
             @@:
                cmp      .disp_col_flag,1
                jne      @f
                xor      eax,eax
                mov      [displ_transparent_col],eax
             @@:
                jmp      .end_var
             .nx56:
                ; submit changes
                call     .write_info
                call     NextMdraw_all
                xor      eax,eax
                call     draw_menu
                mov      [do_submit],1
                jmp      .end_var
             .nx21:
                ;--------check if draw bezier surfaces------------------------------
                ;-------------------------------------------------------------------
                mov      eax,1
                mov      ebx,BEZI_F or DERI_F
                ; flags to leave unchanged
                call     zero_flags
                xor      ebx,ebx
                mov      [NextPointsCount],ebx
                mov      [NextTrianglesCount],ebx
                mov      bl,.bezier_flag
                or       bl,bl
                jz       .act1
             .do_bez:
                movzx    ebx,bl
                mov      eax,ebx
                dec      eax
                imul     eax,12
                add      eax,bezier_parameters  ; data.inc
                movups   xmm0,[eax]
                movhlps  xmm1,xmm0
                movlps   [derv_rect_ptr],xmm0
                movss    [bez_patch_count],xmm1
                cmp      bl,5
                je       .tri_bez
             .forw_bez:
                ; always 30000 elements allocation as minimal !!
                mov      [triangles_count_var],30000
                mov      [points_count_var],30000
                mov      eax,'frea'
                call     free_mem_for_tp
                mov      eax,'alla'
                call     alloc_mem_for_tp
                jmp      .end_var
                ;---------------------------------------------------------------------
                ;------------------------end bezier surfaces routines-----------------
                ;---------------------------------------------------------------------
              .tri_bez:
                mov      ebx,tetra_triangles
                ; transform one tri from tri/points list
                ; transform to nodes of bezier patch (only one now)
                mov      edi,bezier3_nodes
                mov      ecx,1
              .transform23bez:
                push     ecx
                push     ebx

                movzx    ecx,byte [ebx]
                imul     ecx,12
                mov      esi,tetra_points_r
                movups   xmm0,[ecx+esi]
                movzx    ecx,byte[ebx+1]
                imul     ecx,12
                movups   xmm1,[ecx+esi]
                movzx    ecx,byte[ebx+2]
                imul     ecx,12
                movups   xmm2,[ecx+esi]

                rcpps    xmm7,[const3]
                movups   [edi],xmm0

                movups   xmm3,xmm0
                subps    xmm3,xmm1
                movaps   xmm4,xmm3
                dpps     xmm3,xmm3,01110111b  ; xm3 = lenght of edge
                sqrtps   xmm3,xmm3
                rcpps    xmm5,xmm3
                mulps    xmm4,xmm5            ; xm4 = normailsed edge vect
                mulps    xmm3,xmm4
                movups   xmm5,xmm0
                mulps    xmm3,xmm7            ; xm3 = "step" value
                subps    xmm5,xmm3
                movups   [edi+12],xmm5
                subps    xmm5,xmm3
                movups   [edi+24],xmm5
                movups   [edi+36],xmm1

                add      edi,48
                movups   xmm3,xmm1
                subps    xmm3,xmm2
                movaps   xmm4,xmm3
                dpps     xmm3,xmm3,01110111b  ; xm3 = lenght of edge
                sqrtps   xmm3,xmm3
                rcpps    xmm5,xmm3
                mulps    xmm4,xmm5            ; xm4 = normailsed edge vect
                mulps    xmm3,xmm4
                movups   xmm5,xmm1
                mulps    xmm3,xmm7            ; xm3 = "step" value
                subps    xmm5,xmm3
                movups   [edi],xmm5
                subps    xmm5,xmm3
                movups   [edi+12],xmm5
                movups   [edi+24],xmm2

                movups   xmm3,xmm2
                subps    xmm3,xmm0
                movaps   xmm4,xmm3
                dpps     xmm3,xmm3,01110111b  ; xm3 = lenght of edge
                sqrtps   xmm3,xmm3
                rcpps    xmm5,xmm3
                mulps    xmm4,xmm5            ; xm4 = normailsed edge vect
                mulps    xmm3,xmm4
                movups   xmm5,xmm2
                mulps    xmm3,xmm7            ; xm3 = "step" value
                subps    xmm5,xmm3
                movups   [edi+36],xmm5
                subps    xmm5,xmm3
                movups   [edi+48],xmm5
                movups   [edi+60],xmm0

                addps    xmm0,xmm1
                addps    xmm0,xmm2
                mulps    xmm0,xmm7
                ; final  central point
                movups   [edi+72],xmm0
                add      edi,84

                pop      ebx
                inc      ebx
                pop      ecx
                dec      ecx
                jnz      .transform23bez
                jmp       .forw_bez
          if 0
             .transform 23bez:
                push     ecx
                push     ebx

                movzx    ecx,byte [ebx]
                imul     ecx,12
                mov      esi,tetra_points_r
                movups   xmm0,[ecx+esi]
                movzx    ecx,byte[ebx+1]
                imul     ecx,12
                movups   xmm1,[ecx+esi]
                movzx    ecx,byte[ebx+2]
                imul     ecx,12
                movups   xmm2,[ecx+esi]
                movaps   xmm7,[constrecip3]
                movups   [edi],xmm0

                movaps   xmm4,xmm0
                movaps   xmm3,xmm0
                subps    xmm3,xmm1            ; ed 0 1
                mulps    xmm3,xmm7
                addps    xmm4,xmm3
                movups   [edi+12],xmm4
                addps    xmm4,xmm3
                movups   [edi+24],xmm4
                movups   [edi+36],xmm1

                movaps   xmm4,xmm1
                movaps   xmm3,xmm1
                subps    xmm3,xmm2            ; ed 0 1
                mulps    xmm3,xmm7
                addps    xmm4,xmm3
                movups   [edi+48],xmm4
                addps    xmm4,xmm3
                movups   [edi+60],xmm4
                movups   [edi+72],xmm2

                movaps   xmm4,xmm0
                movaps   xmm3,xmm0
                subps    xmm3,xmm2            ; ed 0 1
                mulps    xmm3,xmm7
                addps    xmm4,xmm3
                movups   [edi+84],xmm4
                addps    xmm4,xmm3
                movups   [edi+96],xmm4

                addps    xmm0,xmm1
                addps    xmm0,xmm2
                mulps    xmm0,xmm7
                movups   [edi+108],xmm0
                add      edi,120

                pop      ebx
                add      ebx,1
                pop      ecx
                dec      ecx
                jnz      .transform23bez
                ; we have 10 nodes
                ; one tri Bezier patch
                jmp      .forw_bez
          end if
             .act1:
                or       eax,-1
                mov      [edit_particle_no],eax
                mov      [ed_bpatch.derive_no],ax
                xor      ebx,ebx         ; next join hint = off
                call     do_active_object
                jmp      .end_var
             .nx50:
                mov      ebx,CHUN_F or SHCH_F or EDIT_F
                ; flags to stay intact
                mov     edi,'nf' ; non free inner verts mem
                call     zero_flags
                jmp      .end_var
             .nx57:
                cmp      .ffd_flag,0
                je       @f
                mov      ebx,FFD_F ; flag to stay intact
                call     zero_flags
             @@:
                cmp      .ffd_flag,3
                jne      @f
                mov      [Def_Derv_flag],1
             @@:
                mov      edi,ffd_nodes
                mov      esi,ffd_nodes_intial
                mov      ecx,56*3
                cld
                rep      movsd
                lea      esi,[NextMsub]
                lea      edi,[NextMxadd]
                mov      ecx,17
                rep      movsw
                call     .write_info
                call     NextMdraw_all
                xor      eax,eax
                call     draw_menu
                jmp      .end_var
             .nx64:
                push     [triangles_ptr]
                call     from_tex
                pop      eax
                cmp      eax,[triangles_ptr]
                je       .end
                mov      ebx,11b
                mov      ecx,10000b     ; from tex mark
                mov      eax,0.0035     ; this value sholud be
                ; image/object size dependent !!
                ; Thinka about improve !!
                call     remove_redundant_vert_ch    ; merge verts
                call     remove_non_tri
                call     .calc_combo
                call     .write_info
                or       eax,-1
                call     draw_menu
                jmp      .end_var
             .nx62:
                call     do_deformation_normals_depend
                call     .calc_combo
                call     .write_info
                or       eax,-1
                call     draw_menu
                jmp      .end_var
             .nx61:
                movzx    eax,.rph_bump_flag
                cvtsi2ss xmm1,eax
                movlps   xmm0,[norm_dev2]
                shufps   xmm1,xmm1,0
                mulps    xmm0,[f3]
                mulps    xmm0,xmm1
                movlps   [norm_dev],xmm0
                jmp      .end_var
             .nx20:
                movzx    eax,.disp_fac_flag
                mov      ecx,eax
                add      eax,2
                shl      eax,2
                ; long pipe derives
                ; init work, depend segs cnt.
                mov      [long_pipe_one_seg_c],eax
                ; displacement factor
                inc      ecx
                mov      eax,3400
                shl      eax,cl
                mov      [displac_div],eax
                jmp      .end_var
              .nx22:
                cmp      .lpipe_flag,0
                je       @f
                call     call_l_pipe
              @@:
             .end:
                mov      [menu_button_number],0
                mov      esp,ebp
                pop      ebp
ret
;******************************************************
;*****************                      ***************
;*****************  Various procedures  ***************
;*****************                      ***************
;******************************************************
; some long pipe work
pipe_a:
                push       ebp
                mov        ebp,esp
                sub        esp,37

               .cnt_drv    equ dword [ebp-4]
               .src        equ [ebp-8]
               .screen     equ [ebp-12]
               .yres       equ [ebp-16]
               .xres       equ [ebp-20]
               .part_no    equ [ebp-24]
               .src2       equ [ebp-28]
               .Def_Derv_verts_base equ [ebp-32]
               .Def_Derv_base       equ [ebp-36]
               .ffd_flag            equ byte[ebp-37]

                mov        edx,xres_var
                movzx      eax,word[edx]
                movzx      ebx,word[edx+2]
                mov        edx,[screen_ptr]
                mov        .cnt_drv,ecx
                mov        .src,esi
                mov        .src2,esi
                mov        .xres,eax
                mov        .yres,ebx
                mov        .screen,edx
                movzx      eax,[ed_bpatch.derive_no]
                mov        bl,[ffd_flag]
                mov        .part_no,eax
                mov        .Def_Derv_verts_base,dword Def_Derv_verts_base
                ; spline deformation base of calculated verts
                mov        .Def_Derv_base, dword Def_Derv_base
                ; spline deformation derives base
                mov        .ffd_flag,bl
                cmp        bl,3
                je         .ffd3
                mov        ecx,[edit_particle_no]
                mov        .part_no,ecx

                mov        ecx,[long_pipe_segs_c]
                lea        ecx,[ecx*3]
                mov        .cnt_drv,ecx
                mov        esi,long_pipe_derv_rotated
                mov        .src,esi
                call       rotate_long_pipe_derv  ; rotate derives, scale
                or         edx,-1                 ; and translate on vector
                mov        eax,.part_no
                cmp        eax,edx
                je         @f
                movss      xmm1,dword[edit_end_x]
                movss      xmm0,dword[edit_start_x]
                xorps      xmm2,xmm2
                punpcklwd  xmm0,xmm2
                punpcklwd  xmm1,xmm2
                psubd      xmm1,xmm0
                cvtdq2ps   xmm1,xmm1
                dec        eax
                imul       eax,12
                add        eax,.src ;long_pipe_derv_rotated
                movups     xmm0,[eax]
                addps      xmm0,xmm1
                movlps     [eax],xmm0
                movhlps    xmm0,xmm0
                movss      [eax+8],xmm0
             @@:
             .ffd3:
                xor        ebx,ebx
                ; ebx = marker if deformation splines based cause..
             .calc_spli_base:
                push       ebx
                mov        edx,[long_pipe_vert_ptr]  ; i store verts spline deform
                mov        .src2,edx                 ; vert here (long_pipe_vert), to multi purpose buff
                mov        ecx,.src
                cmp        ebx,1
                cmove      ecx,.Def_Derv_base
                cmp        ebx,1
                cmove      edx,.Def_Derv_verts_base
                mov        eax,[long_pipe_one_seg_c]
                mov        ebx,[long_pipe_segs_c]    ; segments count
                cmp        [lpcurve_tp],0
                je         @f
                lea        ebx,[ebx*3]
             @@:
                mov        eax,[long_pipe_one_seg_c]
                push       eax
                push       ebx
                lea        ebx,[ebx*3]
                movzx      esi,[lpcurve_tp]
                call       calc_long_curve
                ; eax - curve one segment steps count, must be divisable by 4
                ; ebx - curve segments count
                ; ecx - curve derives ptr, Bspline - must end redundand
                ;        first two derives vertices (closed curve)
                ; edx - curve_vertices ptr, must be allocated enough memory
                ; Bspline / Bezier / Catmull Rom / Hermite curves
                pop        ebx
                pop        eax
                imul       eax,ebx
                pop        ebx
                cmp        .ffd_flag,3
                jne        @f
                inc        ebx
                cmp        ebx,1
                je         .calc_spli_base
                ; jmp        .no_white_dots
              @@:
          if 0
                mov        [points_count_var],eax
                mov        ecx,eax
                mov        [triangles_count_var],3
                xor        eax,eax
                ; eax = maarker if deformation splines based cause..
        ;    .draw_bas_vertices:
                push       eax
                push       ecx
                mov        esi,.src2
                cmp        eax,1
                cmove      esi,.Def_Derv_verts_base
                mov        edx,.xres
                xorps      xmm3,xmm3
                movlps     xmm4,.xres
                mov        eax,3
                movd       xmm5,eax
                shufps     xmm5,xmm5,0
                paddd      xmm3,xmm5
                psubd      xmm4,xmm5
             @@:
             .again_vertex:
                movups     xmm0,[esi]
                cvtps2dq   xmm0,xmm0
                movaps     xmm7,xmm0
                movaps     xmm6,xmm0
                pcmpgtd    xmm7,xmm4
                pcmpgtd    xmm6,xmm3
                xorps      xmm7,xmm6
                pmovmskb   eax,xmm7
                cmp        al,-1
                jne        @f
                pcmpeqd    xmm7,xmm7
                sub        esp,8
                movlps     [esp],xmm0
                pop        eax ebx
                imul       ebx,edx
                add        ebx,eax
                shl        ebx,2
                add        ebx,.screen
                movlps     [ebx],xmm7
                lea        ebx,[ebx+edx*4]
                movlps     [ebx],xmm7
             @@:                          ;  skip vertex
                add        esi,12
                loop       .again_vertex
                ; pop      ecx
                ; pop      eax
                ; inc      eax
                ; cmp      .ffd_flag,3
                ; jne      @f
                ; cmp      eax,1
                ; je       .draw_bas_vertices
                ; in esi -- derives vertices, each derive 4 vertices,
                ;           each derive vertex 3xdword float
                ;    ecx -- derives count
                ;    edi -- screen buffer ptr
                ;    eax -- screen width
                ; derives  list must be scaled and translated into screen coords
             @@:
         end if
                ; .no_white_dots:
                mov        esi,.src
                mov        ecx,.cnt_drv
                cmp        [ffd_flag],3
                je         @f
                cmp        [lpcurve_tp],0
                je         @f
                add        ecx,2
              @@:
                push       ecx
                mov        edi,.screen
                mov        eax,.xres
                call       draw_long_pipe_derv_lines
                pop        ecx
                mov        esi,.src
                mov        eax,.xres
                mov        ebx,.yres
                mov        edx,.screen
                mov        edi,[edit_buf_ptr]
                call       write_edit_bars_vert  ; write handlers to derives
             .en:
                mov        esp,ebp
                pop        ebp
                ret
;===============================================================
;===============================================================
prompt_proc:
; some text info on top bar
                push   ebp
                mov    ebp,esp
                pushad
                ; push   dword[ebp+8]
                xor    eax,eax          ; clear draw buffer
                mov    edi,prompt_screen
                mov    ecx,PROMPTX * PROMPTY
                cld
                rep    stosd
                ; eax - x * 65536 + y -> where to display
                ; ebx - pointer to character table
                ; ecx - size x * 65536 + size y -> size of scr buffer
                ; esi - pointer to ASCIIZ string to display
                ; edi - pointer to scr buffer
                mov    eax,5 shl 16 + 5
                mov    ebx,font_table + 62 ;  /fonttab2 + 62
                mov    ecx,PROMPTX shl 16 + PROMPTY
                ; pop    esi
                mov    esi,[ebp+8]
                mov    edi,prompt_screen
                mov    edx,16   ; / 9
                call   write_text
                pop_abi_regs
                ; invoke StretchDIBits,[hdc],1,1,PROMPTX,PROMPTY,0,0,PROMPTX,PROMPTY,prompt_screen,Pbmi,0,SRCCOPY
                invoke SendMessage,[hwnd],WM_PAINT ,0,0
                push_abi_regs
                popad
                mov    esp,ebp
                pop    ebp
                ret    4
;==============================================================
; rendering procedure commands
rend_com:
                push      ebx edi esi
                push      ebp
                mov       ebp,esp
                sub       esp,60
                .scr      equ      [ebp-4]      ; \
                .xres     equ      [ebp-8]      ;   > not xchg order
                .yres     equ      [ebp-12]     ; /
                .nextM    equ dword[ebp-16]

                mov       esi,[screen_ptr]
                mov       edx,xres_var
                movzx     ebx,word[edx]
                movzx     ecx,word[edx+2]
                mov       .scr, esi
                mov       .xres, ebx
                mov       .yres, ecx
                or        eax,-1
                mov       edi,[edit_buf_ptr]
                mov       ecx,.yres
                imul      ecx,.xres
                cld
                rep       stosd

                cmp       [lpipe_flag],1  ;   long pipe cause
                jne       .no_pipe_edit
                xor       eax,eax
                mov       edi,.scr
                mov       ecx,.yres
                imul      ecx,.xres
                cld
                rep       stosd

                ; mov       ecx,3           ; ffd spli derv nodes count
                call      pipe_a
                jmp       .no_draw
             .no_pipe_edit:

                cmp       [wave_flag],0
                je        @f
                xor       eax,eax
                mov       edi,.scr
                mov       ecx,.yres
                imul      ecx,.xres
                cld
                rep       stosd
                ; capture shape buff ?
                ; one bit wave buff ?
                ; float/fixed words wave shape list?
                mov       edx,[wave_ptr]
                mov       eax,[wave_end_ptr]
                movups    xmm0,.yres
                call      draw_wave
                jmp       .draw_image
              @@:
                ; cmp       [bezier_flag],0
                ; je        .no_bz
                ; call      bezier
                ; .no_bz:
                stdcall   g_draw
                call      write_info

             .no_draw:
                cmp       [edit_flag],0  ; edition - vert, face and edge
                je        .skip_ed
                call      write_edit_bars
             .skip_ed:
                movzx     eax,[ccounter_inc]
                ; if custom rotate
                mov       esi,main_rotary
                cmp       [lpipe_flag],0
                jne       .custom
                mov       cl,[rotary_flag]
                cmp       cl,3
                je        .custom
                xor       ebx,ebx
                cmp       [edit_flag],0
                cmovne    eax,ebx
                shl       cl,3
                shl       eax,cl
                xorps     xmm6,xmm6
                movd      xmm7,eax
                punpcklbw xmm7,xmm6
                punpcklwd xmm7,xmm6
                jmp       .end_rot_inc
             .custom:     ; custom rotate
                push      [custom_obj_angle]
                pop       eax
                and       eax,0x00ff00ff
                mov       ebx,eax
                shr       ebx,16
                mov       dword[esi],ebx
                mov       dword[esi+4],eax
                xorps     xmm7,xmm7
             .end_rot_inc:
                pcmpeqd   xmm0,xmm0
                movups    xmm1,[esi]
                paddd     xmm1,xmm7
                psrld     xmm0,24
                andps     xmm1,xmm0
                movhlps   xmm2,xmm1
                movlps    [esi],xmm1
                movss     [esi+8],xmm2
             .draw_image:
                ; mov       eax,.xres
                ; mov       edx,.yres
                pop_abi_regs
                ; cmp       [stencil_s_flag],0
                ; je        @f
                ; mov       ebx,[stencil_shd_A_light_ptr]
                ; mov      .scr,ebx

                ; @@:
                ; stretch_menu
                invoke  SendMessage,[hwnd],WM_PAINT ,0,0
                ; invoke    StretchDIBits,[hdc],0,30,eax,edx,0,0,eax,edx,.scr ,bmi,0,SRCCOPY
                push_abi_regs
                ; invoke  BitBlt,[hdc],0,30,eax,ebx,[screen_ptr],bmi,0,SRCCOPY
                movzx     eax,[draw_flag]
                shl       eax,1
                add       eax,shd_model_addr
                movzx     eax,word[eax]
                add       eax,sh_model
                prompt    eax
                mov       esp,ebp
                pop       ebp
                pop       esi edi ebx
ret
;====================================================================
;====================================================================
do_active_object:
           push       ebp
           mov        ebp,esp
           .mem_hint  equ [ebp-4]   ; m = join/append to existing object.
           .hints     equ [ebp-8]   ; hints = file
           .nxpc      equ [ebp-12]  ; point/verts count
           .nxtc      equ [ebp-16]  ; tris/faces  count
           .nxp_ptr   equ [ebp-20]  ; point/verts list adress
           .nxt_ptr   equ [ebp-24]  ; tris/faces list adress
           .nxp2_ptr  equ [ebp-28]  ; point/verts list sec adress
           .nxt2_ptr  equ [ebp-32]  ; tris/faces list sec adress
           .tp_call   equ dword[ebp-36]   ; tris / points counter proc
           .call_read equ dword[ebp-40]   ; read chosen format proc
           .fil_ptr   equ dword[ebp-44]   ;
           .file_next_ptr equ dword[ebp-48]
           .file_ptr      equ dword[ebp-52]
           .endian        equ      [ebp-53]
           sub     esp,60
           mov     .mem_hint,eax
           mov     .hints,ebx
         ;  mov     .hints,bl
           mov     ebx,File_next_ptr
           mov     eax,[ebx]
           mov     .file_next_ptr,ebx
           mov     esi,[File_ptr]
           ; cmp     bl,'n'
           ; cmove   esi,eax
           mov     .fil_ptr,esi
           or      esi,esi
           jnz     @f
           or      esi,eax
           jz      .load_h
           mov     .fil_ptr,eax
        @@:
           ; switching between objects - seems be near correct.
           ; load new objects as follows: 3ds, asc, ply (binary)
           ; join, append to existing: seems ok.
           cmp     [hrt_flag],2
           je      @f
           mov     eax,'notp'         ; if join = skip tris points lists
           call    free_mem_for_tp    ; release buffers mem
         @@:
           xor     eax,eax
           or      ebx,-1
           call    zero_flags
           xor     eax,eax
           mov     [NextPointsCount],eax ; disable next mesh
           mov     [NextTrianglesCount],eax
           or      ebx,-1
           call    zero_flags            ; zero colided edges and chunks count
           call    write_info
           xor     eax,eax
           call    draw_menu
           mov     eax,hrt_flag
           cmp     byte[eax],1
           jne     .no_hrt
         .load_h:
           call    read_hrt
           jmp     .prep
         .no_hrt:
           cmp     byte[eax],2
           jne     @f
           call    load_tetrahedron
           jmp     .prep
         @@:
           ; mov     edx,.mem_hint
           ; mov     eax,'alla'    ; free all, alloc all
           ; cmp     edx,'firs'    ; - first allocation  ??
           ; cmove   eax,edx
           mov     esi,.fil_ptr
           mov     edx,[esi]
           cmp     dx,'MM'
           je      .3ds
           cmp     dx,'pl'
           je      .ply
           cmp     edx,'FORM'
           je      .lwo
           cmp     dx,'Am'
           je      .asc
           cmp     edx,'IDP2'
           jne     .load_h
         .md2:
           mov     eax,read_md2_tp
           mov     ebx,read_md2
           jmp     .read1
         .lwo:
           mov     eax,read_lwo_tp
           mov     ebx,read_lwo
           jmp     .read1
         .asc:
           mov     eax,read_asc_tp
           mov     ebx,read_asc
           jmp     .read1
         .ply:
           mov     eax,read_ply_tp
           mov     ebx,read_ply
           jmp     .read1
         .3ds:
           mov     eax,read_3ds_tp
           mov     ebx,read_from_file
         .read1:
           mov     .tp_call,eax
           mov     .call_read,ebx
           mov     ebx,[FileSize]
           mov     esi,.fil_ptr
           ; tp count unified call
           ; in:
           ;     esi = ptr to data
           ; out:
           ;     eax = points count
           ;     ebx = tris count
           ;     cl = 'f' if fail
           ;     cl = 'b' if big endian
           call    .tp_call
           mov     .endian,cl
           cmp     cl,'f'   ; if load fail => generate object
           je      .load_h
           mov     [EndFile],edx
           mov     .nxpc,eax
           mov     .nxtc,ebx
           cmp     byte .hints,'m'    ; join/append next
           jne     .no_join
           add     eax,[points_count_var]
           add     eax,12
           imul    eax,12
           malloc  eax
           mov     .nxp_ptr,eax
           mov     esi,[points_r_ptr]
           mov     edi,eax
           mov     ecx,[points_count_var]
           lea     ecx,[ecx*3]
           cld
           rep     movsd
           mov     .nxp2_ptr,edi
           mov     ebx,.nxtc
           add     ebx,[triangles_count_var]
           add     ebx,12
           imul    ebx,12
           malloc  ebx
           mov     .nxt_ptr,eax
           mov     esi,[triangles_ptr]
           mov     edi,eax
           mov     ecx,[triangles_count_var]
           lea     ecx,[ecx*3]
           cld
           rep     movsd
           mov     .nxt2_ptr,edi
           mov     eax,.nxpc
           mov     ebx,.nxtc
           mov     esi,.fil_ptr ;[File_ptr]
           mov     edi,.nxt2_ptr
           mov     edx,.nxp2_ptr
           movlps  xmm0,.endian
           call    .call_read
           mov     [EndFile],ebx
           mfree   [triangles_ptr]
           mfree   [points_r_ptr]
           mov     edi, .nxp2_ptr
           mov     ecx, .nxpc
           call    normalize_object
           mov     ebx,.nxt2_ptr
           mov     eax,[points_count_var]  ; .new_vert_number
           mov     ecx,.nxtc
           mov     edx,.nxpc
           mov     [NextPointsCount],edx
           mov     [NextTrianglesCount],ecx
           lea     ecx,[ecx*3]
          @@:                             ; fix append tri list
           add     dword[ebx],eax
           add     ebx,4
           loop    @b
           mov     edi,.nxt_ptr
           mov     edx,.nxp_ptr
           mov     eax,.nxpc
           mov     ebx,.nxtc
           add     [points_count_var],eax
           add     [triangles_count_var],ebx
           mov     [triangles_ptr],edi
           mov     [points_r_ptr],edx
           xor     eax,eax
           call    alloc_mem_for_tp
           jmp     .prep
         .no_join:
           mov     [points_count_var],eax
           mov     [triangles_count_var],ebx
           ; add     eax,12
           ; imul    eax,12
           ; malloc  eax
           ; mov    .nxp_ptr,eax
           ; add     ebx,12
           ; imul    ebx,12
           ; malloc  ebx
           ; mov    .nxt_ptr,eax

           mov     eax,'alla'
           call    alloc_mem_for_tp
           ; eax = vertices number
           ; ebx = tris number
           ; esi = ptr to data
           ; edi = ptr to tris list
           ; edx = ptr to verts list
;        .read:
           mov     eax,.nxpc
           mov     ebx,.nxtc
           mov     esi,.fil_ptr
           movlps  xmm0,.endian
           ; mov    ecx,esi
           mov     edi,[triangles_ptr]
           mov     edx,[points_r_ptr]
           mov     ecx,[FileSize]
           call    .call_read
           ; cl = 'f' = fail
    ;       mov     [EndFile],ebx
           ; mov     edi,.nxt_ptr
           ; mov     edx,.nxp_ptr
           ; mov     eax,.nxpc
           ; mov     ebx,.nxtc
           ; mov     [points_count_var],eax
           ; mov     [triangles_count_var],ebx
           ; mov     [triangles_ptr],edi
           ; mov     [points_r_ptr],edx
           ; mov     [triangles_ptr],edi
           ; mov     [points_r_ptr],edx
           ; xor     eax,eax
           ; call    alloc_mem_for_tp
     ;     .chg_ptrs:
           ; cmp     byte .hints,'f'
           ; je      .prep
           ; cmp     byte .hints,'n'
           ; jmp     .prep
           ; mov     ebx,File_next_ptr
           ; mfree   [ebx]
           ; xor     edx,edx
           ; mov     eax,File_ptr
           ; mov     ecx,[eax]
           ; mov     [eax],edx
           ; mov     [File_next_ptr],ecx
         .prep:
           mov     ebx,[File_ptr]
           mov     eax,.file_next_ptr
           cmp     [eax],ebx
           je      @f
           mfree   [eax]
         @@:
           mov     ebx,[File_ptr]
           mov     [File_next_ptr],ebx
           mov     eax,'firs' ; ->  no mem work ; .mem_hint
           call    calc_combo
         .end:
        ;   mov     al,2
        ;   call    re_alloc_stenc_shadows
           mov     esp,ebp
           pop     ebp
 ret

;==========================================================
;==========================================================
;==========================================================
zero_flags:     ; Zero some flags.
                ; Some edit features collide each to other.
                ; This proc allow smooth coexisting.
                ; See definitions in data.inc file
                ; ebx = marker with flags related to edit
                ; di = 'nf' -> non free mem last part of prc

                or      edx,-1
                cmp     ebx,edx
                je      .no_zer_flags
                test    ebx,BEZI_F or LPIP_F
                jnz     @f
                ; situations with object to switch
                mov     ah,[lpipe_flag]
                mov     al,[bezier_flag]
                or      ax,ax
                jz      @f
                push    ebx
                mov     bl,'f'
                call    do_active_object
                pop     ebx
             @@:
                mov     esi,zero_flags_list
                xor     edx,edx
                mov     ecx,EDIT_COLL_F_CNT
                ; count of all 'editing' flags
                cld
              .zer:
                lodsw
                cwde
                add     eax,menu_flags
                shr     ebx,1
                jc      @f      ; flag to stay unchanged ?
                mov     [eax],dl
              @@:
                loop    .zer
                cmp     di,'nf'
                je      .end
              .no_zer_flags:
                xor     eax,eax
                mov     [inner_vert_flag],al
                mov     [edges_coll_count],eax
                mov     [mark_coll_ed_flag],al
                mfree   [inner_vert_ptr]
                mfree   [edges_intersect_ptr]
                xor     eax,eax
                mov     [inner_vert_ptr],eax
                mov     [edges_intersect_ptr],eax
              .end:
                xor     eax,eax
                call    draw_menu
                ret
;======================================================================
long_pipe_init: ; calc long pipes control points
                ; segments count depend
                pushad
                movzx ecx,[lpsegs_flag]
                mov   ebx,2
                shl   ebx,cl
                xor   edx,edx
                mov   [long_pipe_segs_c],ebx
                mov   esi,long_pipe_derv_init
                mov   edi,long_pipe_derv+12
                mov   ecx,ebx ;[long_pipe_segs_c]
                lea   ecx,[ecx*3]
                add   ecx,4
                mov   eax,lpipe_derv_size
                mov   [eax],ecx
                call  init_long_pipe_derives
                popad
                ret
;===============================================================
;=================================================================
prepare_object:
                ; prep object to display
             ;   push    ebp
             ;   mov     ebp,esp
             ;   sub     esp,20
             ;   .edg    equ dword[ebp-4]
             ;   .tri_ch equ dword[ebp-8]
             ;   .t_ptr  equ dword[ebp-12]

                mov    edi,[points_r_ptr]
                mov    ecx,[points_count_var]
                call   normalize_object
                call   init_triangles_normals
                call   init_point_normals

                xor    eax,eax
                call   detect_chunks
                mov    [chunks_ptr],ebx
                mov    [chunks_count],ecx
                call   sort_chunks

           ;     xor    eax,eax
           ;     call   do_edges_list   ; find [greatest_chunk]
                call   remove_non_tri
                mov    eax,[triangles_ptr]
                mov    ebx,[points_r_ptr]
                mov    ecx,[triangles_count_var]
                mov    edx,[points_count_var]
                call   remove_unused_vertices
                mov    [points_count_var],ecx
                call   sort_chunks
                ; sort and detect chunks inside
            ;    call   opt_chunks
                ; opt as default
                call   do_edges_list ; do edges two times

                mov    eax,[tex_scale]
                call   calc_bumpmap_coords
             ;   mov    esp,ebp
             ;   pop    ebp
                ret
;=====================================================================
update_flags_after_keypress:
;   in ax - wparam
                pushad
                mov   esi,menu_data
           @@:
                cmp   [esi+menu_data.key1 - menu_data],al
                je    .updat
                cmp   [esi+menu_data.key2 - menu_data],al
                je    .updat
                add   esi,menu_data.op
                cmp   byte[esi],-1
                jnz   @b
                jmp   .end
          .updat:
                movzx eax,[esi+menu_data.butt_no - menu_data]
                movzx edx,byte[eax+menu_flags-1]
                inc   dl
                cmp   dl,[esi+menu_data.max_flag - menu_data]
                jne   @f
                xor   dl,dl
           @@:
                mov   [eax+menu_flags-1],dl
                mov   [menu_button_number],ax
                xor   eax,eax
                call  draw_menu
          .end:
                popad
                ret
;========================================================
rotate_normals:
                pushad
                push      ebp
                mov       ebp,esp
                sub       esp,150
                .nnx_mx   equ [ebp-36]
                .nnx_rot  equ [ebp-44]
                .nsinx    equ [ebp-48]
                .ncosx    equ [ebp-52]
                .nsiny    equ [ebp-56]
                .ncosy    equ [ebp-60]
                .nsinz    equ [ebp-64]
                .ncosz    equ [ebp-68]
                .tn         equ dword[ebp-124]
                .tnr        equ dword[ebp-120]
                .pn         equ dword[ebp-116]
                .pc         equ dword[ebp-112]
                .tc         equ dword[ebp-108]
                .pts_ptr    equ dword[ebp-104]
                .tri_ptr    equ dword[ebp-100]
                .pts_rt_ptr equ dword[ebp-96]
                .pnr        equ dword[ebp-92]
                .edges_ptr  equ dword[ebp-88]
                .nxpc       equ dword[ebp-84]
                .nxtc       equ dword[ebp-80]
                .rotary     equ dword[ebp-76]
                cld
                lea       esi, [triangles_normals_ptr]
                lea       edi,.tn
                mov       ecx,10
                rep       movsd
                mov       .rotary,rotary
                mov       ebx,[NextPointsCount]
                mov       eax,[NextTrianglesCount]
                mov       .nxpc,ebx
                mov       .nxtc,eax
                mov       esi,.pn
                mov       edi,.pnr
                mov       ecx,.pc
                lea       ebx,[matrix]
                call      .rotary
                mov       esi,.tn
                mov       edi,.tnr
                mov       ecx,.tc
                lea       ebx,[matrix]
                call      .rotary
                xor       eax,eax
                cmp       .nxpc,eax   ;.nxpc  ; Next mesh custom scale
                je        .nno_next
                xorps     xmm3,xmm3
                mov       eax,[NextRotat]
                sub       eax,dword  [NextRotatZero]
                movd      xmm7,eax
                punpcklwd xmm7,xmm3
                movlps    .nnx_rot,xmm7
                cld
                lea       esi,.nnx_rot
                lodsd
                mov       ebx,eax
                lodsd
                xor       edx,edx
                lea       edi,.nnx_mx
                call      make_matrixx
                mov       eax,.tc
                mov       ecx,.nxtc
                sub       eax,ecx   ;.nxpc
                lea       eax,[eax*3]
                shl       eax,2
                ; rotatete tris normals
                mov       edi,.tnr  ;[triangles_normals_rotated_ptr]
                add       edi,eax
                mov       esi,edi
                lea       ebx,.nnx_mx
                call      .rotary
                ; next  points normals
                ; next  p. nr. rotated
                mov       edi,.pnr  ;[points_normals_rotated_ptr]
                mov       ecx,.nxpc
                mov       eax,.pc
                sub       eax,.nxpc
                lea       eax,[eax*3]
                shl       eax,2
                add       edi,eax
                mov       esi,edi
                lea       ebx,.nnx_mx
                call      .rotary
          .nno_next:
                mov       esp,ebp
                pop       ebp
                popad
                ret
;*************************************
;*************************************
;***Bezier patches work
;*************************************
;*************************************
bezier:
                .ver_ffd_cnt  equ dword[ebp-4]
                .tri_ffd_cnt  equ dword[ebp-8]
                .cur_nodes    equ dword[ebp-12]
                .mx_ptr       equ dword[ebp-16]
                .bez_cnt      equ dword[ebp-20]
                push      ebp
                mov       ebp,esp
                sub       esp,24
                mov       .cur_nodes,ffd_nodes_rot
                movzx     ecx,[bez_nodes_count]
                mov       esi,[derv_nodes_ptr]
                mov       edi,.cur_nodes
                mov       ebx,matrix_scaled
                call      rotary
                cvtdq2ps  xmm3,[xxadd]
                andps     xmm3,[zero_hgst]
                movzx     ecx,[bez_nodes_count]
                mov       ebx,.cur_nodes
             @@:
                movups    xmm1,[ebx]
                addps     xmm1,xmm3
                movups    [ebx],xmm1
                add       ebx,12
                loop      @b
                cmp       [bezier_flag],5
                jne       .no_tr
                mov       ebx,.cur_nodes
                mov       [points_count_var],2
                mov       edi,[points_r_ptr]
                movzx     ecx,word[bez_patch_count]
             .bez3trans:
                push      ecx
                push      edi
                push      ebx
                mov       eax,30
                ; mov       edi,[points_r_ptr]
                call      calc_bez3_patch
                pop       ebx
                add       ebx,120
                pop       edi
                add       [points_count_var],800
                add       edi,70*12
                pop       ecx
                mov       esi,[points_r_ptr]
                mov       edi,[triangles_ptr]
                mov       ecx,800
                call      mesh ; calc triangles binded verts
                ; loop      .bez3trans
                mov       [triangles_count_var],ecx
                ; mov       eax,[triangles_ptr]
                ; cvtps2dq  xmm0,[muler0123]
                ; movups    [eax],xmm0
                mov       edi, [points_r_ptr]
                mov       ecx, [points_count_var]
                call      normalize_object
                jmp       .end
                ; calc_bez3_patch proc
                ; in:
                ; eax = number of steps
                ; ebx = ten (10) base points ptr,
                ; ordered as follows:
                ; p300, p201, p102, p003, p012, p021, p030
                ; p120, p201
                ; I use equation:
                ; **** EQUATION below ****
                ; y = 1*p300*s^3+p201*s^2*u*3+p102*s*u^2*3+poo3*u^3*1+
                ; + po12*t*u^3 ...
                ; *** EQUATION above *****
                ; in prepare_bez3_factors I change order (see above)
                ; edi = place for calculated vertices                   1

               ; cmp       [bezier_flag],5
               ; je        .end
             .no_tr:
                ; bezier patches verts (and tris)
                movzx     ecx,word[bez_patch_count]
                ; mov       .cur_nodes,ffd_nodes_rot
                mov       edi,[points_rotated_ptr]
                mov       ebx,[triangles_ptr]
                mov       esi,[derv_rect_ptr]
                xor       edx,edx
                mov       .ver_ffd_cnt,edx    ; counters
                mov       .tri_ffd_cnt,edx
                mov       .bez_cnt,ecx
                xor       ecx,ecx
             .Loop_ffd: ;    bezier patches
                push      ecx
                mov       ecx,16
                movd      xmm0,edi
                mov       edi,ffd_nodes_unpck
                cld
             @@:
                xor       eax,eax
                lodsw
                imul      eax,12
                add       eax,.cur_nodes ;ffd_nodes_rot
                xchg      esi,eax
                movsd
                movsd
                movsd
                xchg      eax,esi
                loop      @b
                push      esi
                mov       esi,ffd_nodes_unpck
                mov       edi,nodes_aligned
                mov       ecx,3
             .alig:
                push      ecx
                push      esi
                mov       ecx,16
             @@:
                movsd
                lodsd
                lodsd
                loop      @b
                pop       esi
                pop       ecx
                lodsd     ; esi + 4
                loop      .alig

                mov       esi,nodes_aligned
                movd      edi,xmm0
                mov       edx, 8 shl 16 + 8
                call      bezier_surface   ; one patch
                add       .ver_ffd_cnt,ecx ; 72  ; ecx  ; renew vert cnt
                add       .tri_ffd_cnt,edx ; 120 ; 105 - tri count pre one patch

                mov       ebx,esi
                pop       esi              ; rect ptr
                mov       eax,ecx
                pop       ecx
                push      ebx
                imul      eax,ecx          ; .bez_cnt
                lea       edx,[edx*3]
                inc       edx
             @@:
                add       [ebx],eax
                sub       ebx,4
                dec       edx
                jnz       @b
                pop       ebx
                ; esi --  derives
                inc       ecx
                cmp       ecx,.bez_cnt
                jnz       .Loop_ffd

                push      .ver_ffd_cnt
                pop       [points_count_var]
                push      .tri_ffd_cnt
                pop       [triangles_count_var]
                mov       esi,[points_rotated_ptr]
                mov       edi,[points_r_ptr]
                mov       eax,200.0
                movd      xmm0,eax
                shufps    xmm0,xmm0,11001111b
                mov       ecx,.ver_ffd_cnt ;[points_count_var]
             @@:
                movups    xmm1,[esi]
                addps     xmm1,xmm0
                movups    [esi],xmm1
                movups    [edi],xmm1
                add       edi,12
                add       esi,12
                loop      @b
             .no_bz:

                call      init_triangles_normals
                call      init_point_normals

                xor       eax,eax
                call      detect_chunks
                mov       [chunks_ptr],ebx
                mov       [chunks_count],ecx
                call      sort_chunks
                xor       eax,eax
                call      do_edges_list    ; sort and detect chu inside
                mov       eax,[tex_scale]
                call      calc_bumpmap_coords
                ; mov       al,'h'
                ; xor       ebx,ebx
                ; call      opt_object2
             .end:
            ;    invoke    SendMessage,[hwnd],WM_PAINT ,0,0
                mov       esp,ebp
                pop       ebp
ret
;=================================================
;=================================================
;=================================================
rotate_points:
;in  ebx - scaled rotary matrix ptr
                push   ebp
                mov    ebp,esp
                and    ebp,-16
                sub    ebp,160
                sub    esp,320
                .points_count_var    equ dword[ebp-48]
                .triangles_count_var equ dword[ebp-44]
                .points_r_ptr        equ dword[ebp-40]
                .triangles_ptr       equ dword[ebp-36]
                .points_rotated_ptr  equ dword[ebp-32]
                .pnr                 equ dword[ebp-28]
                .edges_ptr           equ dword[ebp-24]
                .area_counter        equ dword[ebp-8]
                .tri_no              equ dword[ebp-4]
                .mid_vert      equ       [ebp-64]
                .mx_ptr        equ dword [ebp-68]
                .ptr           equ dword [ebp-72]
                .NxBendDerv    equ dword [ebp-76]
                .bez_cnt       equ dword [ebp-80]
                .scale3        equ       [ebp-96]
                .zer_h         equ       [ebp-112]

                .scale_brd     equ       [ebp]
                .NxCrCns       equ       [ebp+16]
                .scale_ben     equ       [ebp+32]
                .vert_cnt      equ       [ebp+48]
                .vert_offs     equ dword [ebp+52]
                .nnx_rot       equ qword [ebp+56]
                .onedtwo       equ       [ebp+64]
                .chu_ptr       equ dword [ebp+104]
                .curr_indexes  equ dword [ebp+108]  ;    .ver_ffd_cnt
                .next_flag     equ byte  [ebp+112]
                .derv_vertNx   equ dword [ebp+116]  ; 48 bytes
                .nnx_mx        equ       [ebp+120]
                .rotary        equ dword [ebp-116]
                .from_area_ptr equ dword [ebp-120]
                .area_indexes  equ dword [ebp-124]
                cld
                lea       esi,[points_count_var]
                lea       edi,.points_count_var
                mov       ecx,7
                rep       movsd
              ;  cmp       .points_count_var,ecx
              ;  jz        .end
                ; mov     .rotary,rotary
                mov       .mx_ptr,ebx              ; init some locals
                movaps    xmm2,[zero_hgst]
                movaps    .zer_h,xmm2
                xorps     xmm1,xmm1
                movss     xmm0,[scale]
                mov       eax,3
                cvtsi2ss  xmm2,eax
                movlps    xmm1,[NextCurvConst]
                shufps    xmm0,xmm0,0 ;11000000b
                shufps    xmm2,xmm2,11110011b
                cvtdq2ps  xmm1,xmm1
                mov       al,[NextMed_flag]
                movaps    .scale_brd,xmm0
                movlps    .NxCrCns,xmm1
                mov       .NxBendDerv,NextBendDerv
                movaps    .scale_ben,xmm2
                mov       .next_flag,al
                cmp       [bezier_flag],0
                je        .no_bz
                call      bezier
                cmp       [bezier_flag],5
                jne       .end
                ;****************************
                ;end Bezier patches fragment
                ;****************************
              .no_bz:
                mov       esi,.points_r_ptr
                mov       edi,.points_rotated_ptr
                mov       ecx,.points_count_var

                cmp       [lpipe_flag],0
                je        .rot
                lea       ecx,[ecx*3]
                shr       ecx,2
                inc       ecx
                movaps    xmm1,.scale_brd
              @@:
                movups    xmm0,[esi]
                mulps     xmm0,xmm1
                movaps    [edi],xmm0
                add       esi,16
                add       edi,16
                loop      @b
                jmp       .after_rot
              .rot:
                mov       ebx,.mx_ptr  ;[matrix_scaled]
                call      rotary
              .after_rot:
                movzx     eax,[NextScale]
                movzx     ebx,[NextScaleZero]
                sub       eax,ebx
                movss     xmm1,[NextScaleXY]
                movss     xmm2,[NextScaleXYZero]
                xorps     xmm0,xmm0
                punpcklwd xmm1,xmm0
                punpcklwd xmm2,xmm0
                psubd     xmm1,xmm2
                cvtdq2ps  xmm1,xmm1
                ; xor       eax,eax
                cmp       .next_flag,0
                jne       @f
                cmp       [ffd_flag],0
                je        @f
                ; xm0, xm3 - mid vert         ; whole ffd
                ; xm6   - scale
                movss     xmm6,[scale]
                xorps     xmm0,xmm0
                xorps     xmm3,xmm3
                shufps    xmm6,xmm6,0
                mov       eax,.points_rotated_ptr
                mov       ecx,.points_count_var
                call      ffd
              @@:
                mov       ecx,200
                cvtsi2ss  xmm3,ecx
                rcpss     xmm3,xmm3
                shufps    xmm3,xmm3,11000000b
                shufps    xmm4,xmm4,11000000b
                cvtsi2ss  xmm2,eax
                shufps    xmm2,xmm2,11000000b
                addps     xmm2,xmm1
                mulps     xmm2,xmm3
                addps     xmm2,[the_one]
                movaps    .scale3,xmm2
                mov       eax,[NextRotat]
                sub       eax,dword [NextRotatZero]
                movd      xmm7,eax
                xorps     xmm6,xmm6
                punpcklwd xmm7,xmm6
                movlps    .nnx_rot,xmm7
                cld
                lea       esi,.nnx_rot
                lodsd
                xchg      ebx,eax
                lodsd
                xor       edx,edx
                lea       edi,.nnx_mx
                call      make_matrixx
             @@:
                ;---------------------------------
                ;------------ /next/ edition work
                ;---------------------------------
                or        ebx,-1
                mov       .tri_no,ebx

                mov       al,.next_flag
                cmp       al,1
                je        .next
                cmp       al,3
                je        .whole
                cmp       al,4
                je        .from_area
             ;   cmp       al,5
             ;   je        .sketch
                cmp       al,2
                jne       .end
                mov       eax,[curr_chunk]
                cmp       eax,ebx
                je        .end
                or        eax,eax
                jz        .end
                cmp       [chunks_count],1
                je        .end
                mov       esi,[chunks_desc_ptr]
                cmp       esi,ebx
                je        .end
                jmp       .chunk
             .from_area:
                xor       edi,edi             ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
             .write_area_list:                ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
                push      edi                 ;Next edit - from tess area ;
                cvtdq2ps  xmm3,[xxadd]        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
                movups    xmm0,[tri_area_x1]  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
                movlhps   xmm3,xmm3
                mov       esi,.points_rotated_ptr
                subps     xmm0,xmm3
                mov       ecx,.points_count_var
                xor       edx,edx
                movhlps   xmm1,xmm0
                mov       .area_counter,edx
                mov       ebx,.from_area_ptr
                cld
             .count:
                movups    xmm2,[esi]
                movaps    xmm3,xmm2
                movaps    xmm4,xmm2
                cmpltps   xmm2,xmm0
                cmpltps   xmm3,xmm1
                xorps     xmm2,xmm3
                movmskps  eax,xmm2
                and       al,11b
                cmp       al,11b
                jne       .no_in_a
                inc       edx
                or        edi,edi
                jz        .no_in_a
                movups    [ebx],xmm4
                add       ebx,12
                push      edi
                mov       edi,.curr_indexes
                mov       eax,.area_counter
                stosd
                mov       .curr_indexes,edi
                pop       edi
             .no_in_a:
                inc       .area_counter
                add       esi,12
                loop      .count
                pop       edi
                inc       edi
                cmp       edi,1
                jne       .skip_malloc
                or        edx,edx
                jz        .end
                mov       ebx,edx
                add       ebx,20
                shl       ebx,2
                malloc    ebx
                mov       .area_indexes,eax
                mov       .curr_indexes,eax
                shl       ebx,2
                malloc    ebx
                mov       .from_area_ptr,eax  ; area verts
                jmp       .write_area_list
             .skip_malloc:
                mov       .vert_cnt,edx
                mov       edi,edx                 ; - vert_count
                mov       edx,.from_area_ptr      ; - offset / address
                jmp       .m_val
             .chunk:
                mov       eax,[curr_chunk]
            ;    inc       eax
                shl       eax,4
                add       eax,esi ;[chunks_desc_ptr]
                cmp       dword[eax+4],0
                je        .end
              @@:
                ; mov eax,[chunks_desc_ptr]
                mov       ecx,[eax]     ; [eax]    - tri  count
                mov       edi,[eax+4]   ; [eax+4]  = edi - vert count in curr chun
                ; [eax+8]  - vert offset
                ; [eax+12] - tri offset
                mov       edx,dword[eax+8]
                imul      edx,12
                add       edx,.points_rotated_ptr
                jmp       .m_val
             .next:
                cmp       al,1
                jne       .whole
                mov       edi,[NextPointsCount]
                or        edi,edi
                jz        .end
                mov       edx,.points_count_var
                sub       edx,edi
                imul      edx,12
                add       edx,.points_rotated_ptr
                jmp       .m_val   ; make values
             .whole:
                cmp       al,3     ; whole object
                jne       @f
                mov       edx,.points_rotated_ptr
                mov       edi,.points_count_var
                mov       ecx,.triangles_count_var
             .m_val:
             @@:
     ;        .sketch:
     ;           cmp       al,5
     ;           jne       @f
             ;   cmp       [sketch_progress],1
             ;   jna       .end
             ;   call      find_v_sketch
     ;           mov       ebx,[sketch_buff_ptr]
     ;           mov       eax,[sketch_verts_indices_ptr]
     ;           mov       edi,[sketch_verts_No]
     ;
     ;           mov       edx,[sketch_verts_ptr]
     ;           or        ebx,ebx
     ;           jz        .end
     ;           mov       .area_indexes,eax
     ;           mov       .from_area_ptr,edx
     ;           or        edi,edi
     ;           jz        .end
             @@:
                mov       .vert_offs,edx
                mov       .vert_cnt,edi
                mov       ecx, edi  ;.vert_cnt
                mov       edi, edx  ;.vert_offs
                mov       eax,'ocen'      ;  only center translate - find max min also !!
                call      normalize_object
                ; xm0, xmm3 = mid vert
                ; xm6       = scale
                ; xm1       = max x, y, z
                andps     xmm0,.zer_h
                movaps    .mid_vert,xmm0
                cmp       [ffd_flag],0
                je        @f
                movaps    xmm3,xmm0    ; mid
                ; xmm6 -  scale
                ; xmm3 -  center
                mov       eax,.vert_offs
                mov       ecx,.vert_cnt
                call      ffd
             @@:
           ;     lea       esi,.nnx_mx         ;esi - pointer to 3x3 matrix
           ;     mov       ebx,scale           ;ebx - ptr to scale
           ;     call      add_scale_to_matrix
                mov       ecx,.vert_cnt
                mov       edi,.vert_offs
                mov       esi,edi
                lea       ebx,.nnx_mx
                call      rotary
          ;   cmp .next_flag ,5
          ;   je  .chA
                mov       edi,.vert_offs
                xor       ecx,ecx
                mov       ecx,.vert_cnt
                rcpps     xmm7,.scale_brd
                mov       eax,[NextCurvConst]
                lea       eax,[eax*2-10]
                cvtsi2ss  xmm6,eax
                ; xmm7 -  brosdcasted scale  reciprocal
             @@:
                movups    xmm0,[edi]

                mulps     xmm0,xmm7
                movaps    xmm2,xmm0
                mulps     xmm2,.NxCrCns
                addps     xmm2,.NxCrCns
                minps     xmm2,xmm6
                xorps     xmm3,xmm3
                cvtps2dq  xmm2,xmm2
                movd      eax,xmm2
                imul      eax,12
                add       eax,.NxBendDerv
                movlps    xmm3,[eax]
                mulps     xmm3,.scale_ben
                addps     xmm0,xmm3
                mulps     xmm0,.scale_brd
                mulps     xmm0,.scale3
                addps     xmm0,.mid_vert
            ;    andps     xmm0,.zer_h
            ;    andps     xmm1,xmm4
            ;    orps      xmm0,xmm1
                movlps    [edi],xmm0
                movhlps   xmm0,xmm0      ;
                movss     [edi+8],xmm0   ; Z coordinate of next/chunks edit
                add       edi,12         ;
                loop      @b
             .chA:
                cmp       .next_flag,4
                jl        .end
                mov       ebx,.from_area_ptr
                mov       esi,.area_indexes
                mov       ecx,.vert_cnt
                cld
             .re_actual:
                lodsd
                imul      eax,12
                add       eax,.points_rotated_ptr
                xchg      esi,ebx
                mov       edi,eax
             ;   movups    xmm0,[esi]
             ;   subps     xmm0,.mid_vert
             ;   movlps    [edi],xmm0
             ;   movhlps   xmm0,xmm0
             ;   movss     [edi],xmm0
                movsd
                movsd
                movsd
             ;    add       esi,12
                xchg      esi,ebx
                loop      .re_actual
                cmp       .next_flag,5
                je        .end
                mfree     .from_area_ptr
                mfree     .area_indexes
             .end:
                add       esp,320
                pop       ebp
ret
;===============================
ffd:
;*******************************
;*******************************
; Free form deformation ...
; Various ways of deformation...
; spline, twist, Bezier patch
; Bezier volume
;*******************************
;*******************************
;*******************************
                push        ebp
                mov         ebp,esp
                sub         esp,350
                and         ebp,-16
                sub         ebp,144
                 ;in:
                 ;       eax -  start vertices list =
                 ;                = vertex start offset
                 ;       ecx -  vertices count to deform
                 ;       xmm6 - scale
                 ;       xmm3 - center
                .sub_fact       equ  [ebp-4]
                .cos            equ  dword [ebp-8]  ;temporaly cover
                .sin            equ  dword [ebp-12] ;scale_brd variable..
                .scale_brd      equ  [ebp-16]
                .onedtwo        equ  [ebp-32]
                .maxxyz         equ  [ebp-48]
                .dest_vert      equ  [ebp-64]
                .ffd_center     equ  [ebp-80]
                .dest_vert_inti equ  [ebp-96]
                .t1234          equ  [ebp-112]
                .proc_call      equ  dword[ebp-116]
                .ffd_flag       equ  byte [ebp-117]
                .marker         equ  byte [ebp-118]
                .start_vert     equ  [ebp-122]
                .vert_cnt       equ  [ebp-126]

                .verts          equ  [ebp]    ; 12 x 4 bytes place
                .verts_b        equ  [ebp+48] ; 12 x 4 bytes place
                .the_factor     equ  [ebp+64]
                .verts_c        equ  [ebp+64]
                .verts_d        equ  [ebp+80]
                .zero_hgst      equ  [ebp+96]
                .max            equ  [ebp+112]
                .min            equ  [ebp+128]


                movaps    xmm4,[zero_hgst]
                movaps    xmm7,[f05x3]
                movaps    .maxxyz,xmm7
                mov       .start_vert,eax
                mov       .vert_cnt,ecx
                lea       ebx,[ffd_scale]
                mov       cl,[ffd_flag]
                movaps    .scale_brd,xmm6
                movaps    [ebx],xmm6
                movaps    [ebx+16],xmm3
                movaps    .ffd_center,xmm3
                movaps    .onedtwo,xmm7
                movaps    .zero_hgst,xmm4
                mov       .ffd_flag,cl
                cmp       cl,4
                je        .twist
                cmp       cl,3
                jne       .no_ffd3       ;no spline ffd
                mov       edi,Def_Derv_flag
                cmp       [edi],byte 0
                je        .next_ffd3     ;next step after init spline ffd
                mov       [edi],byte 0
                mov       esi,eax
                mov       eax,1
                mov       edi,Def_Derv
                ; ecx = vertices count
                ; esi = vertices begin ptr
                ; edi = ptr to result derives
                ; eax = derives count
                call      init_def_derv   ; see long_pipe.inc file
                mov       ecx,2000
                mov       edi,Def_Derv
                movaps    xmm1,.ffd_center
                movss     xmm3,[xres_vard]
                xorps     xmm5,xmm5
                punpcklwd xmm3,xmm5
                cvtdq2ps  xmm3,xmm3
                mulps     xmm3,.onedtwo
             @@:
                movlps    xmm7,[edi]
                addps     xmm7,xmm3
                addps     xmm7,xmm1
                movlps    [edi],xmm7
                add       edi,12
                loop      @b
                mov       esi,Def_Derv
                mov       edi,Def_Derv_copy
                mov       ecx,2000
                cld
                rep       movsd
                mov       esi,Def_Derv
                mov       edi,Def_Derv_base
                mov       ecx,2000
                cld
                rep       movsd
                jmp       .en
             .next_ffd3:
                ; spline deformation work
                xorps     xmm0,xmm0
                movaps    .max,xmm0
                movaps    .min,xmm0
                mov       esi,.start_vert
                mov       ecx,.vert_cnt
             .llopB:                       ; free form modifier
                push      ecx              ; spline based
                push      esi
                movups    xmm0,[esi]
                rcpps     xmm2,.scale_brd
                mulps     xmm2,xmm0
                addps     xmm2,.onedtwo
                movaps    .t1234,xmm2
                mov       ebx,Def_Derv_base
                ; calc_Hermite calling params
                ; in:
                ;      ebx - derives: dword float x1,y1,z1,x2,y2,z2
                ;                                 x3,y3,z3,x4,y4,z4
                ;      esi - aligned t1,t2,t3,t4
                ; out:
                ;      edi - points (vertices) as dword float
                ;           x1,y1,z1 x 4
                ;   y = (a0*y1+a1*m0+a2*m1+a3*y2)
                lea       esi,.t1234
                lea       edi,.verts
                call      calc_Hermite
                mov       ebx,Def_Derv_copy
                lea       esi,.t1234
                lea       edi,.verts_b
                call      calc_Hermite
                lea       eax,.verts
                lea       ebx,.verts_b
                movaps    xmm0,[eax]
                subps     xmm0,[ebx] ;.verts_b
                andps     xmm0,.zero_hgst
                pop       esi
                movups    xmm1,[esi]
                subps     xmm1,xmm0
                movups    [esi],xmm1    
                pop       ecx
                add       esi,12
                loop      .llopB
                jmp       .en
                ; calc_Hermite calling params
                ; in:
                ;      ebx - derives: dword float x1,y1,z1,x2,y2,z2
                ;                                 x3,y3,z3,x4,y4,z4
                ;      esi - aligned t1,t2,t3,t4
                ; out:
                ;      edi - points (vertices) as dword float
                ;           x1,y1,z1 x 16
                ;   y = (a0*y1+a1*m0+a2*m1+a3*y2)
             .twist:
                mov       esi,.start_vert
                mov       ecx,.vert_cnt
                movzx     eax,[disp_fac_flag]    ;word[NextScale]
                inc       eax
                push      eax
                fninit
                fld1
                fdiv      dword .scale_brd   ; calc reciprocal
                fimul     dword [esp]
                fldpi
                fdivp
                fstp      dword .scale_brd   ; .sub_fact
                pop       eax
              @@:
                fld       dword[esi+4]
                fmul      dword .scale_brd  ; .sub_fact
                fsincos
                fstp      .sin
                fst       .cos

                fmul      dword[esi]
                fld       .sin
                fmul      dword[esi+8]
                fchs
                faddp
                fld       dword[esi]
                fxch      st1
                fstp      dword[esi]

                fmul      .sin
                fld       .cos
                fmul      dword[esi+8]
                faddp
                fstp      dword[esi+8]

                add       esi,12
                loop      @b
                jmp       .en
             .no_ffd3:
                mov       ecx,56
                mov       esi,ffd_nodes
                mov       edi,ffd_nodes_rot
                mov       ebx,matrix
                cmp       .ffd_flag,2
                jne       @f
                call      rotary
                mov       ecx,56
                mov       esi,ffd_nodes_intial
                mov       edi,ffd_nodes_intial_rot
                mov       ebx,matrix
                call      rotary
                mov       esi,ffd_nodes_rot
                mov       edi,esi
             @@:
             .ff2:
                movaps    xmm2,.scale_brd
                xor       eax,eax   ; - > MARKER
                ; ffd_nodees_rot -> 1st dest -  ffd pralellpiped destination
                ; ffd_nodees_intial_rot -> sec destination
                movaps    xmm3,.ffd_center
                mov       ecx,1
                movaps    xmm4,.zero_hgst
                cvtsi2ss  xmm7,ecx
                andps     xmm3,xmm4
                pslldq    xmm7,12
                andps     xmm2,xmm4
                orps      xmm2,xmm7  ;[one_hgst_dd1]
             .ag1:
                mov       ecx,56
             @@:
                movups    xmm0,[esi]
                mulps     xmm0,xmm2
                addps     xmm0,xmm3
                movups    [edi],xmm0
                add       esi,12
                add       edi,12
                loop      @b
                mov       esi,ffd_nodes_intial_rot
                mov       edi,esi
                inc       eax
                cmp       eax,1
                je        .ag1
                mov       eax,64
                mov       ecx,16
                cmp       .ffd_flag,2
                cmove     ecx,eax
                xor       edx,edx       ; marker
                mov       .marker,dl
                mov       edx,ffd_nodes_rot
                mov       esi,ffd_rect
                mov       edi,ffd_nodes_unpck
                cld
             @@:
                xor       eax,eax
                lodsw
                imul      eax,12
                add       eax,edx
                xchg      esi,eax
                movsd
                movsd
                movsd
                xchg      eax,esi
                loop      @b
                mov       esi,ffd_rect
                mov       edi,ffd_nodes_unpck_inti
                mov       edx,ffd_nodes_intial_rot
                mov       ecx,64
                inc       .marker
                cmp       .marker,1
                je        @b
                mov       ebx,4
                mov       edx,1
                cmp       .ffd_flag,1
                cmove     ebx,edx
                xor       eax,eax
                mov       .marker,al
                mov       esi,ffd_nodes_unpck
                mov       edi,nodes_aligned    ; minimum 16*16*3 bytes - in Bezier ffd volume cause
             .alig22:
                mov       ecx,3
             .alig2:
                push      ecx
                push      esi
                mov       ecx,16
                imul      ecx,ebx
             @@:
                movsd
                lodsd
                lodsd      ;  add  esi,8
                loop      @b
                pop       esi
                pop       ecx
                lodsd      ;  esi + 4
                loop      .alig2
                mov       esi,ffd_nodes_unpck_inti
                mov       edi,nodes_aligned_inti    ; minimum 16*16*3 byt
                inc       .marker
                cmp       .marker,1
                je        .alig22
                mov       ebx,1
                movaps    xmm7,.onedtwo
                movzx     eax,[tolerancy_flag]
                addps     xmm7,xmm7
                cvtsi2ss  xmm6,eax
                mulps     xmm6,xmm7
                shufps    xmm6,xmm6,0
                movaps    .the_factor,xmm6    ; ffd factor
                mov       eax,bezier_surface
                mov       ebx,bezier_volume
                cmp       .ffd_flag,2
                cmove     eax,ebx
                mov      .proc_call,eax
                mov       ebx,.start_vert  
                mov       ecx,.vert_cnt
                rcpps     xmm0,.scale_brd
                movaps    .scale_brd,xmm0
              ;   fninit
           .llopA:                            ; free form modifier
                push      ecx
                movups    xmm0,[ebx]
                mulps     xmm0,.scale_brd
                mulps     xmm0,.onedtwo
                addps     xmm0,.onedtwo
                ;xm0 0.0 - 1.0
                movaps    .dest_vert,xmm0
                movaps    xmm1,xmm0
                movhlps   xmm2,xmm0
                shufps    xmm1,xmm1,11111101b
                push      ebx
                cmp       .ffd_flag,2
                jne       @f
                mov       esi,nodes_aligned_inti   ;ffd_verts_rot
                lea       edi,.dest_vert_inti
                call      .proc_call          ; bezier_surface
             @@:
                movaps    xmm0,.dest_vert
                movaps    xmm1,xmm0
                movhlps   xmm2,xmm0
                shufps    xmm1,xmm1,11111101b
                mov       esi,nodes_aligned   ; ffd_verts_rot
                mov       eax,'offd'          ; only one vert
                lea       edi,.dest_vert
                call      .proc_call          ; bezier_surface
                movaps    xmm0,.dest_vert
                pop       ebx
                cmp       .ffd_flag,2
                je        @f
                subps     xmm0,.ffd_center

                movlps    [ebx],xmm0
                jmp       .ff1
             @@:
                subps     xmm0,.dest_vert_inti
                mulps     xmm0,.the_factor
                movups    xmm3,[ebx]
                andps     xmm0,.zero_hgst
                addps     xmm3,xmm0
                movups    [ebx],xmm3
             .ff1:
                add       ebx,12
                pop       ecx
                dec       ecx
                jnz       .llopA
            .en:
                add       esp,350
                pop       ebp
ret
;================================================
;================================================
;Rendering loops
peel:
                 pushad
                 push      ebp
                 mov       ebp,esp
                 sub       esp,(280+16)
                 and       ebp,-16
                 sub       ebp,(128+16)
                .x_max                equ dword [ebp-4]
                .x_min                equ dword [ebp-8]
                .y_max                equ dword [ebp-12]
                .y_min                equ       [ebp-16]
                .th_no                equ word  [ebp-20]
                .th_noh               equ dword [ebp-20]
                .ind123               equ       [ebp-48]
                .ind123a              equ       [ebp-44]
                .ind123b              equ       [ebp-40]
                .tex_m                equ       [ebp-64]
                .coord3               equ       [ebp-84]
                .coord2               equ       [ebp-88]
                .coord1               equ       [ebp-92]
                .esi_pee_call         equ dword [ebp-96]

                .dr_fl                equ byte  [ebp-101]
                .cul_flag             equ       [ebp-102]
                .tmap                 equ dword [ebp-106]

                .points_count_var     equ dword [ebp]
                .triangles_count_var  equ dword [ebp+4]
                .points_r_ptr         equ dword [ebp+8]
                .triangles_ptr        equ dword [ebp+12]
                .points_rot           equ dword [ebp+16]
                .pnr                  equ dword [ebp+20]
                .edges_ptr            equ dword [ebp+24]
                .edges_count          equ dword [ebp+28]
                .edge_s_d_ptr         equ dword [ebp+32]
                .screen_ptr           equ dword [ebp+36]
                .Zbuffer_ptr          equ dword [ebp+40]
                .slices_counter_ptr   equ dword [ebp+44]
                .slices_ptrs_buff_ptr equ dword [ebp+48]
                .tex_points_f_ptr     equ dword [ebp+52]
                .tnr                  equ dword [ebp+56]
                .zer_h                equ       [ebp+64]
                .correct_texf         equ       [ebp+80]
                .chunks_ptr           equ dword [ebp+96]
                .line_call            equ       [ebp+100]

                .tex_shift            equ dword [ebp+104]
                .tex_x4               equ dword [ebp+108]
                .tex_size             equ dword [ebp+112]
                mov       dl,[s_tex_flag]  ; small texture
                cmp       dl,1
                je        @f
                mov       .tex_shift,TEX_SHIFT
                mov       .tex_x4,TEX_X * 4
                mov       .tex_size,TEXTURE_SIZE
                jmp       .txb
              @@:
                mov       .tex_shift,TEX_SHIFT_S
                mov       .tex_x4,TEX_X_S * 4
                mov       .tex_size,TEXTURE_SIZE_S
              .txb:

                mov       ecx,1
                mov       eax,16
                mov       .th_noh,ebx
                mov       edi,texmap
                mov       esi,texmap_s

                cmp       dl,1
                cmovne    eax,ecx
                cvtsi2ss  xmm7,eax
                shufps    xmm7,xmm7,0
                rcpps     xmm7,xmm7
                movaps    .tex_m,xmm7
                cmp       dl,1
                cmove     edi,esi
                mov       .tmap,edi
                mov       esi,[chunks_ptr]
                mov       eax,[triangles_normals_rotated_ptr]
                mov       .chunks_ptr,esi
                cld
                movaps    xmm0,[zero_hgst]
                movaps    xmm7,[correct_texf]
                movaps    .zer_h,xmm0
                movaps    .correct_texf,xmm7
                lea       esi,[points_count_var]
                lea       edi,.points_count_var
                mov       ecx,14
                rep       movsd
                ; ecx = 0
             ;   cmp       .points_count_var,ecx
             ;   je        .end
                mov       .tnr,eax
                mov       al,[draw_flag]
                mov       ah,[culling_flag]
                mov       esi,.Zbuffer_ptr
                cmp       al,20
                cmove     esi,.slices_ptrs_buff_ptr
                mov       .cul_flag,ah
                mov       .dr_fl,al
                mov       edx,glass_tex_line
                mov       ebx,glass_tex_line_sl_var  ;.gtsv
                cmp       al,18
                cmove     ebx,edx
                cmp       al,17
                cmove     ebx,edx
                mov       edx,glass_tex_line_sl
                cmp       al,19
                cmove     ebx,edx
                mov       edx,r_phg_bf_line_z
                cmp       al,16
                cmove     ebx,edx
                cmp       al,15
                cmove     ebx,edx
                mov       edx,fur_real_phong_line_z
                cmp       al,21
                cmove     ebx,edx
                mov       edx,horizontal_tex_grd_line
                cmp       al,5
                jne       @f
                mov       ebx,edx
                jmp       .fff
              @@:
                cmp       al,8
                jnbe      .fff
                mov       ebx,bump_tex_line_z
              .fff:
                mov       .line_call,ebx
                mov       .esi_pee_call,esi ; zbuff / slices ptr buff

                xorps     xmm2,xmm2
                movlps    xmm0,[xres_vard]
                movaps    xmm1,xmm0
                psrlw     xmm0,1
                punpckldq xmm0,xmm1
                punpcklwd xmm0,xmm2
                ; xm0 = lo->hi dword x/2, y/2, x, y
                xor       esi,esi  
                mov       ax,.th_no 
                cmp       .dr_fl,8
                jnle      .th4
                cmp       al,0
                jne       @f                
                shufps    xmm0,xmm0,11111001b
                punpckldq xmm2,xmm0
                movaps    xmm0,xmm2            
                ; xm0 = lo->hi dword 0, y/2, 0, x
                jmp       .skip_div
              @@:
                shufps    xmm0,xmm0,10111101b
                ; xm0 = lo->hi dword y/2, y, **, x  
                pinsrw    xmm0,esi,4             
                jmp       .skip_div
              .th4:
                cmp       al,0
                jne       @f
                shufps    xmm0,xmm0,11110001b
                punpckldq xmm2,xmm0
                movaps    xmm0,xmm2
                ; xm0 = lo->hi dword 0, y/2, 0, x/2                            
                jmp       .skip_div
             @@:
                cmp       al,1
                jne       @f
                shufps    xmm0,xmm0,10000100b
                pinsrw    xmm0,esi,0
                ; xm0 = lo->hi dword 0, y/2, x/2, x
                jmp       .skip_div
             @@:
                cmp       al,2
                jne       @f  
                shufps    xmm0,xmm0,00101101b
                pinsrw    xmm0,esi,4
                ; xm0 = lo->hi dword y/2, y, 0, x/2
                jmp       .skip_div
             @@:
              ;  cmp       al,3
              ;  jne       .skip_div
                shufps    xmm0,xmm0,10001101b
                ; xm0 = lo->hi dword y/2, y, x/2, x
             .skip_div:
                movups    .y_min,xmm0
                ; order = ymin ymax xmin xmax
                cmp       [shadow_flag],1
                jne       .skip_shd
                movaps    xmm1,xmm0  ;.y_min
                call      do_shadow
             .skip_shd:
                mov       esi,.Zbuffer_ptr
                mov       edi,stencil_line
                cmp       .dr_fl,17
                je        @f
                cmp       .dr_fl,18
                je        @f
                cmp       .dr_fl,20
                jne       .pee
                mov       esi,.slices_ptrs_buff_ptr
                mov       edi,st_sl_var_line  ;.sli_var
             @@:
                movups    xmm5,.y_min
                mov       eax,'peel'
                call      do_stencil          ; do peel buff - depth of every voxel
            .pee:
                cld
                mov       esi,.triangles_ptr
                xor       ecx,ecx
                cld
            .again_peel_tri:
                push      ecx
                cld
                mov       edi,ecx
                movd      mm7,ecx
                add       ecx,ecx
                add       ecx,.chunks_ptr ; random color
                movd      mm6,[ecx]
                movzx     ecx,word[ecx]
                add       ecx,[ecx+.pee]
                rol       ecx,4
                xchg      cl,ch
                ; imul      ecx,10001
                movd      mm0,ecx
                ; push      edi
                lodsd
                mov       ecx,eax
                lodsd
                mov       edx,eax
                lodsd
                xchg      ecx,eax
                cmp       .cul_flag,byte 1  ; culling
                je        @f                ; correct to peel and
                imul      edi,12            ; and weighted,
                add       edi,.tnr          ; other phong-transparent models
                bt        dword[edi+8],31   ; seems be reversed
                jnc       .cull
             @@:
                push      eax
                push      edx
                push      ecx
                movups    xmm7,[esp]
                pslld     xmm7,2
                imul      eax,12
                imul      edx,12
                imul      ecx,12
                movaps    .ind123,xmm7
                mov       edi,.screen_ptr
                mov       ebx,.pnr
                movups    xmm7,.zer_h
                movups    xmm0,[eax+ebx]
                movups    xmm1,[edx+ebx]
                movups    xmm2,[ecx+ebx]
                andps     xmm0,xmm7
                andps     xmm1,xmm7
                andps     xmm2,xmm7
                mov       ebx,.points_rot  ;.points_rot
                movlps    xmm4,[eax+ebx]
                movhps    xmm4,[edx+ebx]
                movups    xmm6,[ecx+ebx]
                movhlps   xmm7,xmm6
                cvtps2dq  xmm4,xmm4
                cvtps2dq  xmm6,xmm6
                packssdw  xmm4,xmm4
                packssdw  xmm6,xmm6
                movlhps   xmm4,xmm6
                movups    .coord1,xmm4
                movlps    xmm4,[eax+ebx+8]
                movhps    xmm4,[edx+ebx+8]
                shufps    xmm4,xmm4,11111000b  ; z coord work
                movlhps   xmm4,xmm7
                movups    xmm5,.y_min
                mov       ebx,.th_noh
                bt        ebx,30               ; two thread
                jnc       .no_bum
     ;           cmp       .dr_fl,3
     ;           jz        @f
     ;           cmp       .dr_fl,4
     ;           jz        @f
     ;           cmp       .dr_fl,5
     ;           jz        @f
     ;           cmp       .dr_fl,7
     ;           jz        @f
     ;           cmp       .dr_fl,8
     ;           jz        @f
     ;         @@:
                movaps    xmm7,.correct_texf  ; related to:
                mov       ebx,.pnr            ; tex, bump, bump tex
                movhps    xmm2,[eax+ebx]      ; env.
                movlps    xmm2,[ecx+ebx]      ; Models: 3, 4, 5, 7, 8
                movlps    xmm1,[edx+ebx]      ;
                mulps     xmm2,xmm7           ; TODO: grd tex
                addps     xmm2,xmm7
                mulps     xmm1,xmm7
                addps     xmm1,xmm7
                movhlps   xmm0,xmm2
              .no_bum:
              if 0
                mov       ebx,.th_noh
                bt        ebx,26       ; two thread
                jnc       .no_col
                movaps    xmm7,.ind123
                ; mov       eax,.ind123
                ; mov       ecx,.ind123a
                ; mov       edx,.ind123b
                sub       esp,16
                movups    [esp],xmm7
                pop       eax ecx edx ebx
                mov       ebx,.pnr        ; col
                movaps    xmm5,[correct_texf]
                movlps    xmm7,[eax+ebx]
                movhps    xmm7,[ecx+ebx]
                movlps    xmm2,[edx+ebx]
                mulps     xmm7,xmm5
                mulps     xmm2,xmm5
                addps     xmm7,xmm5
                addps     xmm2,xmm5
                mov       edx,color_map
                shufps    xmm7,xmm7,10001101b
                push      edx edx
                cvtps2dq  xmm7,xmm7
                cvtps2dq  xmm2,xmm2
                movlps    xmm5,[esp]
                pslld     xmm7,2
                pslld     xmm2,2
                movhlps   xmm3,xmm7
                pslld     xmm7,TEX_SHIFT
                paddd     xmm7,xmm3
                paddd     xmm7,xmm5
                movlps    [esp],xmm7
                pop       eax  ebx
                movlps    xmm0,[eax]
                movlps    xmm1,[ebx]
                sub       esp,8
                movlps    [esp],xmm2
                pop       eax ebx
                shl       ebx,TEX_SHIFT
                add       eax,ebx
                movlps    xmm2,[eax+edx]
                xorps     xmm3,xmm3
                punpcklbw xmm0,xmm3  ; colors - words in xmm0
                punpcklbw xmm1,xmm3  ; colors - words in xmm0
                punpcklbw xmm2,xmm3  ; and xmm2
                punpcklwd xmm0,xmm3
                punpcklwd xmm1,xmm3
                punpcklwd xmm2,xmm3
                cvtdq2ps  xmm0,xmm0
                cvtdq2ps  xmm1,xmm1
                cvtdq2ps  xmm2,xmm2
              end if
             .no_col:
                pop       ecx
                pop       edx
                pop       eax
                shl       eax,3
                shl       edx,3
                shl       ecx,3
                mov       ebx,.tex_points_f_ptr
                movlps    xmm6,[eax+ebx]
                movhps    xmm6,[edx+ebx]
                movlps    xmm3,[ecx+ebx]
                mulps     xmm6,.tex_m
                mulps     xmm3,.tex_m
                push      esi
                mov       eax,.coord1
                mov       ebx,.coord2
                mov       ecx,.coord3
                ror       eax,16
                ror       ebx,16
                ror       ecx,16
                mov       edx,.tmap
                mov       esi,.esi_pee_call  ;.slc_ptrs
                cmp       .dr_fl,17
                cmove     edx,esi
                movups    xmm7,.line_call
                ; hor line call address and tex params
                push      ebp
                call      glass_tex_tri
                pop       ebp
                pop       esi
             .cull:
                pop       ecx
                inc       ecx
                cmp       ecx,.triangles_count_var
                jne       .again_peel_tri
             .end:
                add       esp,(280+16)
                pop       ebp
                popad
                ret
;=======================================================
include '3glass_tex.inc'
include '2bump_tex.inc'
include '2glass_tex_slices.inc'
include '2glass_tex_slices_var.inc'  ;
include '2fur.inc'
;include '2ray_shd.inc'
include '2grd_tex.inc'
include '2bi_fil.inc'
include 'draw_thrd.inc'
include 'line.inc'
include '2flat.inc'
include '2shd.inc'
include '3stencil.inc'
include '2stencil_slices.inc'
include '2stencil_slices_var.inc'
;=============================================================
;=============================================================
do_stencil:
; in:
;
; xmm5 - max / min values pack
;
; if eax = 'peel' -> peeling buff cause
;    then
;       esi = buffer address, edi = horizontal line proc address
;
; if eax = 'in'   -> inner faces cause
;    then
;       edx = rot points address, ebx, ecx = translate factors addresses
;       xmm1 = pack with variables
;          =  x_var, y_var as words, line_hor proc address(edi), project_buff, z_buff(esi)
;
; if ax = 'iv'    -> inner vertices cause
;       like inner faces cause, except ebx and ecx factors - not importand
; if ax = 'ic'    -> inner vertices of concrete chunk
;       like inner vertices, ebx = chunk number
; 
                push         ebp
                mov          ebp,esp
                sub          esp,64
                .translateB   equ [ebp-4]
                .points_rot   equ [ebp-8]
                .xmax         equ [ebp-16]
                .xmin         equ [ebp-20]
                .ymax         equ [ebp-24]
                .ymin         equ [ebp-28]
                .marker       equ word  [ebp-32]
                .translateA   equ       [ebp-36]
                .stencil_esi  equ dword [ebp-40]   ; buffer
                .project_buff equ       [ebp-44]
                .stencil_hor  equ       [ebp-48]   ; horizontal_line proc address
                .y_res        equ       [ebp-50]
                .x_res        equ       [ebp-52]
                .Zbuffer      equ       [ebp-56]
                .proc_call    equ dword [ebp-60]
                .chu_no       equ       [ebp-64]
             ;   xor       esi,esi
                mov       .chu_no,ebx     ; if ax = 'ic' then ebx = ch no
                movups    .ymin,xmm5
                cmp       [triangles_count_var],0 ;esi
                jz        .end
                mov       .marker,ax
                mov       .translateA,ebx ; A = scale
                mov       .translateB,ecx ; B = translate
                mov       ebx,[Zbuffer_ptr]
                movups    .x_res,xmm1
                mov       .Zbuffer,ebx
                mov       .proc_call, dword stencil_tri
                mov       bl,[draw_flag]
                cmp       ax,'in'    ; - inner faces remove cause
                je        .do_vars2
                cmp       ax,'iv'    ; - inner vert
                je        .do_vars2
                cmp       ax,'ic'    ; - inner verts of concrete chunk
                je        .do_vars2   
                cmp       eax,'peel' ; - peel - I want make
                je        .do_vars   ;   G buffer, with z coord and nor vect
                                     ;   so main call should be to
                                     ;   glass_tex_tri proc
                mov       esi,[slices_ptrs_buff_ptr]
                mov       edi,stencil_line
                cmp       bl,20
                jne       @f
                mov       esi,[slices_counter_ptr]
                mov       edi,stencil_slices_line
             @@:
                cmp       bl,19
                jne       @f
                mov       esi,.Zbuffer
                mov       edi,stencil_slices_line
             @@:
              ;  cmp       bl,100
              ;  cmove     esi,[slices_ptrs_buff_ptr]
             .do_vars:
                cmp       bl,17
                cmove     esi,.Zbuffer
                mov       ebx,[xres_vard]
                mov       .x_res,ebx
                mov       edx,[points_rotated_ptr]
                mov       .stencil_esi,esi
                mov       .stencil_hor,edi   ; horizontal line ptr
             .do_vars2:
                mov       .points_rot,edx
                mov       esi,[triangles_ptr]
                xor       ecx,ecx
             .again_stencil_tri:
                push      edx
                push      ecx
                mov       edi,ecx
                
                movd      xmm7,ecx
                lodsd
                xchg      eax,ecx
                lodsd
                xchg      ebx,eax
                lodsd
                xchg      eax,ecx
                push      esi

                cmp       .marker,'ic'
                jne       @f
                add       edi,edi
                add       edi,[chunks_ptr]
                movzx     edi,word[edi]
                cmp       edi,.chu_no
                jne       .skip
              @@:
                imul      eax,12
                imul      ebx,12
                imul      ecx,12
                mov       esi,[points_normals_rotated_ptr]
                movups    xmm0,[eax+edx]
                movups    xmm1,[ebx+edx]
                movups    xmm2,[ecx+edx]
                cmp       .marker,word 'in'
                jne       @f
                mov       esi,.translateA  ; scale
                mov       edi,.translateB
                mulps     xmm0,[esi] ; .f490x3
                mulps     xmm1,[esi] ; .f490x3
                mulps     xmm2,[esi] ; .f490x3
                addps     xmm0,[edi] ; .f500x3
                addps     xmm1,[edi] ; .f500x3
                addps     xmm2,[edi] ; .f500x3
             @@:
                movhlps   xmm3,xmm0
                movhlps   xmm4,xmm1
                movhlps   xmm5,xmm2
                punpckldq xmm3,xmm4
                movlhps   xmm3,xmm5
                movlhps   xmm0,xmm1
                cvtps2dq  xmm0,xmm0
                cvtps2dq  xmm2,xmm2
                packssdw  xmm0,xmm0
                packssdw  xmm2,xmm2
                movlhps   xmm0,xmm2
                
                sub       esp,16
                movups    [esp],xmm0
           ;     cmp       .marker,'pe'
           ;     jne       @f
           ;     movups    xmm0,[eax+esi] ; attempt fake implementation of Beer law
           ;     movups    xmm1,[ebx+esi] ; nor vects
           ;     movups    xmm2,[ecx+esi]
;----------- xmm0 - 1st normal vector -------------------
;----------- xmm1 - 2cond normal vector -----------------
;----------- xmm2 - 3rd normal vector -------------------
;----------- xmm3 - tex coords tx3, ty3 as --------------
;------------------ float dwords ------------------------
;----------- xmm4 - lo -> hi z1, z2, z3 coords ----------
;-----------  as dwords floats --------------------------
;----------- xmm5 - lo -> hi y_min, y_max, --------------
;-----------  x_min, x_max as dword integers  -----------
;----------- xmm6 - tex 4 dwords lo -> hi ---------------
;-----------  tx1, ty1, tx2, ty2 as floats---------------
;----------- xmm7 - horizontal line proc address --------
            ;    movaps    xmm4,xmm3
            ;    movups    xmm5,.ymin
            ;    pop       eax ebx ecx edx
            ;    jmp       .call
           ;    @@:
      
                pop       eax ebx ecx edx  ; pop edx = fake add esp,4
                ror       eax,16
                ror       ebx,16
                ror       ecx,16
                movups    xmm0,xmm3
                movups    xmm1,.ymin
                mov       edx,.x_res
                ror       edx,16
                mov       edi,.project_buff
                mov       esi,.stencil_esi
                movups    xmm5,.stencil_hor
              .call:
                call      .proc_call
              .skip:  
                pop       esi
                pop       ecx
                pop       edx
                inc       ecx
                cmp       ecx,[triangles_count_var]
                jnz       .again_stencil_tri
             .end:
                mov       esp,ebp
                pop       ebp
ret
;=============================================================
translate_points:
                pushad
                push      ebp
                mov       ebp,esp
                sub       esp,78
                .nxadd                 equ [ebp-78]
                .points_count_var      equ dword[ebp-52]
                .triangles_count_var   equ dword[ebp-48]
                .points_r_ptr          equ dword[ebp-44]
                .triangles_ptr         equ dword[ebp-40]
                .points_rotated_ptr    equ dword[ebp-36]
                .pnr                   equ dword[ebp-32]
                .edges_ptr             equ dword[ebp-28]
                .edges_count           equ dword[ebp-24]
                .edge_s_d_ptr          equ dword[ebp-20]
                .screen_ptr            equ dword[ebp-16]
                .Zbuffer_ptr           equ dword[ebp-12]
                .ptr                   equ [ebp-4]
                cld
                lea       esi,[points_count_var]
                lea       edi,.points_count_var
                mov       ecx,11
                rep       movsd
                mov       al,[bezier_flag]
                cmp       al,5
                je        @f
                or        al,al
                jnz       .end2
              @@:
                xor       esi,esi
                cvtdq2ps  xmm3,[xxadd]
                mov       ebx,.points_rotated_ptr
                mov       ecx,.points_count_var
                sub       ecx,[NextPointsCount]
                jz        .end2
                andps     xmm3,[zero_hgst]
             .add1:
                movups    xmm1,[ebx]
                addps     xmm1,xmm3
                movups    [ebx],xmm1
                add       ebx,12
                loop      .add1
                cmp       [ffd_flag],0
                je        .no_ffd
                or        esi,esi
                cmove     edi,ebx
                ;   cmp       [ffd_flag],1
                ;   jne       @f
                mov       ebx,ffd_nodes_rot
                mov       ecx,56
                inc       esi
                cmp       esi,2
                jne       .add1
                ;    inc      esi
                ;    mov      ebx,ffd_verts_rot
                ;    mov      ecx,[ffd_vert_cnt]  ;56 ;.ver_ffd_cnt
                ;    mov      eax,ffd_nodes_rot
                ;    mov      edx,56
                ;    cmp      esi,2
                ;    cmove    ebx,eax   ;ffd_verts_rot
                ;    cmove    ecx,edx  ;.ver_ffd_cnt
                ;    cmp      esi,3
                ;    jne      .add1
                xchg      ebx,edi
                ;    @@:
             .no_ffd:
                movlps    xmm7,[NextMxadd]
                movlps    xmm5,[NextMsub]
                xorps     xmm6,xmm6
                punpcklwd xmm7,xmm6
                punpcklwd xmm5,xmm6
                psubd     xmm7,xmm5
                pslld     xmm7,1
                movaps    xmm1,[zero_hgst]
                cvtdq2ps  xmm7,xmm7
                andps     xmm7,xmm1
                movups    .nxadd,xmm7
                mov       ecx,[NextPointsCount]
                jecxz     .end_nx
                addps     xmm3,xmm7
                andps     xmm3,xmm1 ;[zero_hgst]
             @@:
                movups    xmm1,[ebx]
                addps     xmm1,xmm3
                movups    [ebx],xmm1
                add       ebx,12
                loop      @b
                jmp       .end2
             .end_nx:

                cmp       [NextMed_flag],2
                jne       .end2
                cmp       [curr_chunk],-1
                je        .end2
       ;         movups    xmm7,.nxadd
       ;         andps     xmm7,[zero_hgst]
       ;         mov       esi,[long_pipe_vert_ptr]
       ;         mov       ecx,4
       ;      @@:
       ;         movups    xmm0,[esi]
       ;         addps     xmm0,xmm3
       ;         movups    [esi],xmm0
       ;         add       esi,12
       ;         loop      @b

                mov       ebx,.triangles_count_var
                add       ebx,200
                malloc    ebx
                mov       .ptr,eax
                mov       edi,eax
                cld
                xor       eax,eax
                mov       ecx,ebx
                sub       ecx,10
                shr       ecx,2
                rep       stosd
                mov       ebx,.triangles_ptr
                mov       ecx,.triangles_count_var
                mov       esi,[chunks_ptr]
                cld
                mov       edx,[curr_chunk]
                movups    xmm7,.nxadd
           ;     andps     xmm7,[zero_hgst]
             .lab1:
                push      ecx
                lodsw
                xor       ecx,ecx
             .lab2:
                push      eax
                cmp       ax,dx
                jne       @f
                mov       eax,[ebx+ecx*4]
                mov       edi,eax
                add       edi,.ptr
                cmp       byte[edi],1
                je        @f
                imul      eax,12
                add       eax,.points_rotated_ptr
                movups    xmm1,[eax]
                addps     xmm1,xmm7
                movups    [eax],xmm1
                mov       byte[edi],1
             @@:
                pop       eax
                inc       ecx
                cmp       ecx,3
                jnz       .lab2
                pop       ecx
                add       ebx,12
                loop      .lab1
                mfree     .ptr

             .end2:
                ; call      calc_shd_verts
                mov       esp,ebp
                pop       ebp
                popad
                ret
;==============================================================
if 0
calc_shd_verts:
                push           ebx esi edi
                push           ebp
                mov            ebp,esp
                sub            esp,200
                and            ebp,-16
                sub            ebp,112
                .p_light_vec   equ [ebp]        ; one light directional vector
                .shd_mx        equ [ebp+32]
                .counter       equ [ebp+100]
                .shd_vertices  equ [ebp-48]
                .zer_h         equ [ebp-64]
                lea       ebx,.shd_mx            ; 64 bytes
                lea       esi,[lights_aligned]   ;.p_light_vec       ; +eax
                call      calc_shadow            ; calculate 4x4 shadow matrix
             .next_shd_vert:
                mov       esi,[points_rotated_ptr]
                mov       ecx,[points_count_var]
                lea       ebx,.shd_mx
                lea       edi,[shadow_vertices_ptr]  ; 16  bytes per one vertex
                add       edi,eax
                call      mul_matrix4x4
                ; thats all ??
                add       esp,200
                mov       ebp,esp
                pop       ebp
                pop       edi esi ebx
                ret
end if
;============================================================================
;============================================================================
;============================================================================
do_shadow:
; in:       xmm1 -> ymin ymax xmin xmax
                pushad
                push     ebp
                mov      ebp,esp
                sub      esp,250
                and      ebp,-16
                sub      ebp,128
                .p_light_vec   equ [ebp]
                .shd_mx        equ [ebp+32]
                .counter       equ [ebp+100]
                .shd_vertices  equ [ebp-48]
                .zer_h         equ [ebp-64]
                .xmax          equ [ebp-68]
                .xmin          equ [ebp-72]
                .ymax          equ [ebp-76]
                .ymin          equ [ebp-80]
                .ptsr          equ dword[ebp-84]
                .pl_coords     equ dword[ebp-88]
                .mul_matrix4x4 equ dword[ebp-92]
                .calc_shadow   equ dword[ebp-96]
                .scr           equ dword[ebp-100]
                .shd_buffe     equ dword[ebp-104]
                .shal          equ [ebp-108]
                mov      eax,[shd_buffer_ptr]
                push     [screen_ptr]
                mov      .shal,dword  shadow_line
                pop      .scr
                mov      .shd_buffe,eax
                push     [points_rotated_ptr]
                pop      .ptsr
                mov      .pl_coords,point_light_coords
                mov      .mul_matrix4x4,mul_matrix4x4
                mov      .calc_shadow,calc_shadow
                movaps   xmm0,[zero_hgst_dd]
                movaps   .ymin,xmm1
                movaps   .zer_h,xmm0   ; some definitions to decrease loop size...
                mov      ecx,[triangles_count_var]
                mov      esi,[triangles_ptr]
             .again_shd_tri:
                push     esi
                push     ecx
                xor      ecx,ecx
             .next_shd_vert:
                push     esi
                mov      .counter,ecx
                mov      eax,ecx
                shl      eax,2
                mov      edx,dword[eax+esi]
                imul     edx,12
                add      edx,.ptsr   ; [edx] - cur vertex
                movups   xmm0,[edx]
                push     edx
                mov      eax,.pl_coords
         ;       andps    xmm0,.zer_h
                subps    xmm0,[eax]
                movaps   xmm1,xmm0
                dpps     xmm1,xmm1,01110111b
          ;      mulps    xmm1,xmm1
          ;      haddps   xmm1,xmm1
          ;      haddps   xmm1,xmm1
          ;      andps    xmm1,.zer_h
                rsqrtps  xmm1,xmm1
                mulps    xmm0,xmm1          ; xmm0 - normalized light vector
                andps    xmm0,.zer_h
                movups   .p_light_vec,xmm0  ;+eax
                lea      ebx,.shd_mx            ; 64 bytes
                lea      esi,.p_light_vec       ; +eax
                call     .calc_shadow            ; calculate 4x4 shadow matrix
                pop      esi
                mov      ecx,1
                mov      eax,.counter
                shl      eax,4
                lea      ebx,.shd_mx
                lea      edi,.shd_vertices  ; 16  bytes per one vertex
                add      edi,eax
                call     .mul_matrix4x4
                pop      esi
                mov      ecx,.counter
                inc      ecx
                cmp      ecx,3
                jnz      .next_shd_vert
                lea      ebx,.shd_vertices
                movlps   xmm0,[ebx]
                movhps   xmm0,[ebx+16]
                movups   xmm1,[ebx+32]
                cvtps2dq xmm0,xmm0
                cvtps2dq xmm1,xmm1
                movaps   xmm3,xmm0
                movhlps  xmm4,xmm0
                movaps   xmm5,xmm1
                movaps   xmm6,.ymin
                shufps   xmm6,xmm6,01110010b ; l->h xmin, ymin, xmax, ymax
                movlhps  xmm3,xmm3
                movlhps  xmm4,xmm4
                movlhps  xmm5,xmm5
                pcmpgtd  xmm3,xmm6
                pcmpgtd  xmm4,xmm6
                pcmpgtd  xmm5,xmm6
                movhlps  xmm6,xmm3
                xorps    xmm3,xmm6
                movhlps  xmm6,xmm4
                xorps    xmm4,xmm6
                movhlps  xmm6,xmm5
                xorps    xmm6,xmm5
                orps     xmm3,xmm4
                orps     xmm3,xmm6
                pmovmskb eax,xmm3
                or       al,al
                jz       .again
                packssdw xmm0,xmm0       ; xm0  lo -> 2 words
                packssdw xmm1,xmm1       ; xm2  lo -> 4 words
                movlhps  xmm0,xmm1
                sub      esp,16
                movups   [esp],xmm0
                pop      ecx ebx eax edx
                ror      eax,16
                ror      ebx,16
                ror      ecx,16
                mov      edx,[xres_vard]
                ror      edx,16
                mov      edi,.scr
                mov      esi,.shd_buffe
                movaps   xmm1,.ymin
                movlps   xmm5,.shal    ; shadow line
                call     stencil_tri   ; triangle_shd
             .again:
                pop      ecx
                pop      esi
                add      esi,12
                dec      ecx
                jnz      .again_shd_tri
                add      esp,250
                pop      ebp
                popad
                ret
;==============================
;==============================
do_lights_shadow_stencil_th:
; 3 stecils shddows buffers according to lights vectors
; ebx - th no
                push        ebp
                mov         ebp,esp
                sub         esp,170
                and         ebp,-16
                sub         ebp,16
                .v1         equ [ebp-4]
                .v2         equ [ebp-8]
                .v4         equ [ebp-12]
                .vecZ       equ [ebp-16]
                .one        equ [ebp-8]
                .crs        equ [ebp-32]
                .cosinus    equ [ebp-36]
                .sinus      equ [ebp-40]
                .light_v_ptr       equ [ebp-44]
                .light_st_buf_ptr  equ [ebp-48]
                .light_mx_ptr      equ [ebp-52]
                .light_alig        equ [ebp-56]
                .mone              equ [ebp-60]
                .translateA        equ [ebp-96]
                .scaleB            equ [ebp-112]
                .stencil_esi       equ dword [ebp-116]   ; buffer
                .project_buff      equ [ebp-120]
                .stencil_hor       equ dword [ebp-124]   ; horizontal_line proc address
                .y_res             equ [ebp-126]
                .x_res             equ [ebp-128]
                .th_no             equ [ebp-132]
                .xmax              equ [ebp+12]
                .xmin              equ [ebp+8]
                .ymax              equ [ebp+4]
                .ymin              equ [ebp]
                mov        .th_no,ebx
                mov        eax,ebx
                shl        eax,6
                add        eax,lights_aligned
                mov        .light_alig,eax
                imul       ebx,44
                add        ebx,shd_stencil_verts_A_ptr
                mov        eax,[ebx]
                mov        .light_v_ptr,eax
                mov        eax,[ebx+4]
                mov        .light_st_buf_ptr,eax
                add        ebx,8
                mov        .light_mx_ptr,ebx
                mov        .stencil_hor,stencil_line
                xor        eax,eax      ; vecZ
                mov        ebx,1.0
                mov        .v1,eax
                mov        .v2,eax
                mov        .v4,eax
                mov        .mone,dword -1.0
                mov        .one,ebx
                lea        eax,.scaleB     ; scale vector
                mov        [eax],ebx
                mov        [eax+4],ebx
                mov        [eax+8],ebx
                mov        [eax+12],ebx
                mov        eax,xres_var
                movzx      ebx,word[eax+2]
                movzx      eax,word[eax]
                mov        .x_res,ax
                mov        .y_res,bx
                mov        .xmax,eax
                mov        .ymax,ebx
                xor        ecx,ecx
                mov        .xmin,ecx
                mov        .ymin,ecx
                imul       eax,ebx
                mov        ecx,eax
                cld
                mov        edi,.light_st_buf_ptr
                mov        eax,60000.1
                rep        stosd
                cvtdq2ps   xmm0,[xxadd]
                movups     .translateA,xmm0  ; translate values
                mov        eax,.light_alig
                movups     xmm0,[eax]
                movups     xmm1,.vecZ
                call       cross_reg
                movups     .crs,xmm0
                ;    lea        edi,.crs
                ;    call       normalize_vector
                movaps     xmm3,xmm0
                ; rotate around .crs vect
                mov        eax,sign_mask  ; see data.inc
                andps      xmm0,[eax+16]  ; [zero_hgst]
                movaps     xmm2,xmm0
                haddps     xmm2,xmm2
                dpps       xmm0,xmm0,01110111b
            ;    mulps      xmm0,xmm0
                haddps     xmm2,xmm2
            ;    haddps     xmm0,xmm0
                andps      xmm2,[eax]     ; get sign
            ;    haddps     xmm0,xmm0
                sqrtps     xmm0,xmm0
                orps       xmm0,xmm2
                movlps     .sinus,xmm0
                rcpps      xmm0,xmm0
                mulps      xmm3,xmm0      ; xm3 = normalized axis vect
                movaps     .crs,xmm3
                ;    lea        edi,.crs
                ;    call       normalize_vector
                ; rotate around vector .crs
                ; dot product is angle cos
                mov        esi,.light_alig
                lea        edi,.vecZ
                call       dot_product
                movss      .cosinus,xmm0
                movaps     xmm2,xmm0
                lea        esi,.sinus
                lea        ebx,.crs
                mov        edi,.light_mx_ptr
                call       make_arbitrary_mx
                mov        esi,[points_rotated_ptr]  ;  only rotated and scaled
                mov        edi,.light_v_ptr
                mov        ecx,[points_count_var]
                mov        ebx,.light_mx_ptr
                call       rotary  ; destroys -> xm's 0, 1, 2, 4, 5, 6
                mov        eax,.light_st_buf_ptr
                mov        .stencil_esi,eax
                mov        edx,.light_v_ptr
                lea        ecx,.translateA
                lea        ebx,.scaleB
                movups     xmm1,.x_res
                movaps     xmm5,.ymin
                mov        ax,'in'
                call       do_stencil
                ; inner faces cause, but scale = 1.0, seems be ok
             .end:
                add        esp,170
                pop        ebp
                ret
;==================================================================
;==================================================================
;==================================================================
;==================BEGIN MAIN RENDERING PROC=======================
;==================================================================
;==================================================================
;==================================================================
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; g_draw - draw to screen buffer; main program proc
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
g_draw:
                push        ebx esi edi
                push        ebp
                mov         ebp,esp
                sub         esp,300
                sub         ebp,128+16
                .xres_var       equ word  [ebp-4]
                .yres_var       equ word  [ebp-2]
                .xres_vard      equ dword [ebp-4]
                .scr            equ dword [ebp-8]       ; \
                .zbuff          equ dword [ebp-12]      ;  |  > dont xchg
                .tex_ptr        equ dword [ebp-16]      ;  |    order !!
                .width          equ       [ebp-20]      ; /
                .x_max          equ dword [ebp-24]
                .x_min          equ dword [ebp-28]
                .y_max          equ dword [ebp-32]
                .y_min          equ       [ebp-36]
                .mx             equ       [ebp-74]

                .inner_vert_ptr equ dword [ebp-78]
                .stencil_hor    equ       [ebp-82]
                .stencil_esi    equ       [ebp-86]
                .matrix_scaled  equ dword [ebp-90]
                .rend_opts      equ word  [ebp-92]

                .points_count_var      equ dword[ebp]
                .triangles_count_var   equ dword[ebp+4]
                .points_r_ptr          equ dword[ebp+8]
                .triangles_ptr         equ dword[ebp+12]
                .points_rotated_ptr    equ dword[ebp+16]
                .pnr                   equ dword[ebp+20]
                .edges_ptr             equ dword[ebp+24]
                .edges_count           equ dword[ebp+28]
                .edge_s_d_ptr          equ dword[ebp+32]
                .screen_ptr            equ dword[ebp+36]
                .Zbuffer_ptr           equ dword[ebp+40]
                .slc_cnt_ptr           equ dword[ebp+44]
                .sl_ptrs_b_ptr         equ dword[ebp+48]

                .blur_flag         equ byte [ebp+53]
                                ;  equ byte [ebp+54]
                                ;  equ byte [ebp+55]
                .map_flag          equ byte [ebp+56]
                .rd_flag           equ byte [ebp+57]
                .draw_flag         equ byte [ebp+58]
                .bump_flag         equ byte [ebp+59]
                .tex_flag          equ byte [ebp+60]
                .save_flag         equ byte [ebp+61]
                .process_flag      equ byte [ebp+62]
                .culling_flag      equ byte [ebp+63]
                                ;  equ byte [ebp+64]
                                ;  equ byte [ebp+65]
                .hrt_flag          equ byte [ebp+66]
                .xchg_flag         equ byte [ebp+67]
                .edit_flag         equ byte [ebp+68]
                .set_tri_area_flag equ byte [ebp+69]
                .disp_col_flag     equ byte [ebp+70]
                .disp_fac_flag     equ byte [ebp+71]
                .bezier_flag       equ byte [ebp+72]
                .tolerancy_flag    equ byte [ebp+73]
                .derive_flag       equ byte [ebp+74]
                .td_wp_flag        equ byte [ebp+75]
                .speed_flag        equ byte [ebp+76]
                                ;  equ byte [ebp+77]
                .lpipe_flag        equ byte [ebp+78]
                .lpsegs_flag       equ byte [ebp+79]
                .lpcurve_tp        equ byte [ebp+80]
                .to_piec_flag      equ byte [ebp+81]
                .NextMed_flag      equ byte [ebp+82]
                                ;  equ byte [ebp+83]
                .normals_flag      equ byte [ebp+84]
                .Z_care_flag       equ byte [ebp+85]
                                ;  equ byte [ebp+86]
                .mark_coll_ed_flag equ byte [ebp+87]
                .s_tex_flag        equ byte [ebp+88]
                                ;  equ byte [ebp+89]
                                ;  equ byte [ebp+90]
                .chunks_o_flag     equ byte [ebp+91]
                                 ; equ byte [ebp+92]; crop fr

                .stencil_s_flag    equ byte [ebp+93]
                                 ; equ byte [ebp+94]
                                 ; equ byte [ebp+95]
                .inner_vert_flag   equ byte [ebp+96]
                                 ; equ byte [ebp+97]
                                 ; equ byte [ebp+98]
                                 ; equ byte [ebp+99]
                .tes_mod_flag      equ byte [ebp+100]
                .show_ch_flag      equ byte [ebp+101]
                                 ; equ byte [ebp+102]
                                 ; equ byte [ebp+103]
                                 ; equ byte [ebp+104]
                                 ; equ byte [ebp+105]
                                 ; equ byte [ebp+106]
                .submit_flag       equ byte [ebp+107]
                .zero_Nx_flag      equ byte [ebp+108]
                .ccounter_inc      equ byte [ebp+109]
                .shadow_flag       equ byte [ebp+110]
                .ffd_flag          equ byte [ebp+111]
                .rph_bump_flag     equ byte [ebp+112]
                                 ; equ byte [ebp+113]
                .valencEd_flag     equ byte [ebp+114]
                .from_tex_flag     equ byte [ebp+115]
                .opt_mesh_flag     equ byte [ebp+116]
                                 ; equ byte [ebp+117]
                .morph_flag        equ byte [ebp+118]

          ;      cmp       [sketch_buff_ptr],0
          ;      je        @f
          ;      cmp       .NextMed_flag,5
          ;      jne       @f
          ;   @@:
                cld
                lea       esi,[points_count_var]
                lea       edi,.points_count_var
                mov       ecx,14
                rep       movsd
                mov       esi,blur_flag
                lea       edi,.blur_flag
                mov       ecx,33 ;66
                rep       movsw
             @@:
                mov       eax,[xres_vard]
                mov       .xres_vard,eax
                mov       esi,eax
                cwde
                shr       esi,16
                mov       .x_max,eax
                mov       .y_max,esi
                mov       .width,eax
                xor       eax,eax
                mov       .x_min,eax
                mov       .y_min,eax
                movzx     eax,.xres_var
                mov       .width,eax
                push      .screen_ptr
                pop       .scr
     
                push      .Zbuffer_ptr
                pop       .zbuff
                push      [inner_vert_ptr]
                pop       .inner_vert_ptr
                mov       .matrix_scaled,matrix_scaled
                movzx     eax,.draw_flag
                add       eax,render_opts
                mov       eax,[eax]
                mov       .rend_opts,ax
                cmp       .bezier_flag,0
                jne        .no_blur
                cmp       .blur_flag,1
                jne       .bl
                xor       eax,eax
                cmp       .draw_flag, byte 21
                jne       @f
                or        eax,-1          ; clear draw buffer
             @@:
                mov       edi,.scr
                movzx     ebx,.xres_var
                movzx     ecx,.yres_var
                imul      ecx,ebx
                cld
                rep       stosd
                jmp       .no_blur
              .bl:
                mov       ecx,1
                call      blur_screen
             .no_blur:
                xor       ecx,ecx
                cmp       .points_count_var,ecx
                jz        .sketch
                mov       eax,60000.1
                mov       edi,.zbuff
                movzx     ecx,.xres_var
                movzx     ebx,.yres_var
                imul      ecx,ebx
                cmp       .draw_flag,byte 19
                jne       @f
                shl       ecx,2  ; slices need more memory
             @@:
                cld
                rep       stosd
                cmp       .draw_flag,byte 15
                ; all re  al phong models
                jl        .rotate
                mov       eax,60000.1
                mov       edi,.sl_ptrs_b_ptr
                ; i use   temporaly this buff as stencil buff
                movzx     ecx,.xres_var
                movzx     ebx,.yres_var
                imul      ecx,ebx
                cld
                rep       stosd
             .rotate:
                ; mov        esi,fog_matrices
                ; call      animate_fog
                ; mov       edi,agregated_fog
                ; mov       esi,fog_matrices
                ; mov       edx,fog_tex
                ; call      agregate_fog
                mov       esi,main_rotary
                mov       eax,[esi+8]
                mov       ebx,[esi]
                mov       edx,[esi+4]
                mov       edi,matrix
                mov       esi,one_deg
                call      make_matrixx
                mov       esi,matrix
                mov       edi,.matrix_scaled
                mov       ecx,36/4
                cld
                rep       movsd
                mov       esi,.matrix_scaled  ;esi - pointer to 3x3 matrix
                mov       ebx,scale           ;ebx - ptr to scale
                call      add_scale_to_matrix
                mov       ebx,.matrix_scaled
                call      rotate_normals
                call      rotate_points
             if 0
                mov       ebx,.matrix_scaled
                pop_abi_regs
                invoke    CreateThread,NULL,NULL,rotate_points,\
                ebx,NORMAL_PRIORITY_CLASS, [ThreadID0]
                mov       [hthread0],eax
                invoke    SetThreadPriority,hthread0,NORMAL_PRIORITY_CLASS
                invoke    SetThreadAffinityMask,[hthread0],1
                invoke    CreateThread,NULL,NULL,rotate_normals,\
                NULL,NORMAL_PRIORITY_CLASS,[ThreadID1]
                mov       [hthread1],eax
                invoke    SetThreadPriority,hthread1,NORMAL_PRIORITY_CLASS
                invoke    SetThreadAffinityMask,[hthread1],2
                invoke    ResumeThread,[hthread0]
                invoke    ResumeThread,[hthread1]
                invoke    WaitForMultipleObjectsEx,2,hthread0,1,-1,0
                invoke    CloseHandle,[hthread0]
                invoke    CloseHandle,[hthread1]
                push_abi_regs
              end if
                cmp       .stencil_s_flag,0
                je        .n_stenc
                mov       eax, do_lights_shadow_stencil_th       ; all 4 threaded render modles
                xor       edx,edx
                mov       ecx,3
                ;in eax    - proc-th adress
                ;   edx h -  param bitewise 'and' with param passed to proc_th
                call      call_thread
             .n_stenc:
                ; make background in shd buff
                movzx     eax,.xres_var
                movzx     ecx,.yres_var
                mul       ecx
                mov       ecx,eax
                shr       ecx,2
                inc       ecx
                xor       eax,eax
                mov       edi,[shd_buffer_ptr]   ; 1 byte
                cld
                rep       stosd
                cmp       .shadow_flag,1
                jne       .no_shadow
                ; draw background image
                movzx     eax,.tex_flag
                mov       ebx,TEX_X
                cmp       al,2
                jne       @f
                movzx     ebx,.xres_var
             @@:
                cld
                mov       edi,.scr
                mov       esi,texmap
                xor       ecx,ecx
             .agg:
                push      ecx
                xor       edx,edx
             .agg2:
                push      edx
                cmp       al,2
                je        @f
                and       edx,TEX_X - 1  ;0x1ff
                and       ecx,TEX_Y - 1
             @@:
                push      ecx
                imul      ecx,ebx
                add       ecx,edx
                shl       ecx,2
                mov       esi,texmap
                cmp       .tex_flag,2
                cmove     esi,[new_tex_ptr]
                add       esi,ecx
                movsd
                pop       ecx
                pop       edx
                inc       edx
                cmp       dx,.xres_var
                jnz       .agg2
                pop       ecx
                inc       ecx
                cmp       cx,.yres_var
                jnz       .agg
                ; cmp     .dr_fl,15
                ; jge      @f
                ; movups   xmm1,.y_min
                ; cvtdq2p  xmm2,[xxadd]
                ; call     do_shadow
                ;@@:
             .no_shadow:
                ; mov     esi,fog_tex
                ; call    draw_fog
                or        ebx,-1
                mov       eax,[edit_particle_no]
                cmp       eax,ebx
                je        .no_editt
                mov       edi,10        ; z coord granularity
                cvtsi2ss  xmm7,edi
                movlps    xmm1,qword[edit_start_x]
                movsx     edi,[edit_start_z]
                xorps     xmm2,xmm2
                cvtsi2ss  xmm4,edi
                divss     xmm4,xmm7
                punpcklwd xmm1,xmm2
                movhlps   xmm0,xmm1
                psubd     xmm0,xmm1
                cvtdq2ps  xmm0,xmm0
                movlhps   xmm0,xmm4
                mov       edi,.points_rotated_ptr
                movzx     ebx,.edit_flag
                cmp       ebx,1
                jne       @f
                dec       eax
                imul      eax,12
                movups    xmm1,[eax+edi]
                addps     xmm1,xmm0
                movups    [eax+edi],xmm1
                jmp       .no_editt
             @@: ; face temporaly edition routines
                cmp       ebx,2
                jnz       @f
                imul      eax,12
                add       eax,.triangles_ptr
                mov       ebx,dword[eax+4]
                mov       ecx,dword[eax+8]
                mov       eax,dword[eax]
                imul      eax,12
                imul      ebx,12
                imul      ecx,12
                movups    xmm1,[eax+edi]
                movups    xmm2,[ebx+edi]
                movups    xmm3,[ecx+edi]
                addps     xmm1,xmm0
                addps     xmm2,xmm0
                addps     xmm3,xmm0
                movups    [eax+edi],xmm1
                movups    [ebx+edi],xmm2
                movups    [ecx+edi],xmm3
                jmp       .no_editt
             @@:
                cmp       ebx,3  ;  dword[edit_edge_no],-1
                jne       @f
                dec       eax
                shl       eax,3
                add       eax,.edges_ptr
                mov       ebx,[eax+4]
                mov       eax,[eax]
                imul      eax,12
                imul      ebx,12
                movups    xmm1,[eax+edi]
                movups    xmm2,[ebx+edi]
                addps     xmm1,xmm0
                addps     xmm2,xmm0
                movups    [eax+edi],xmm1
                movups    [ebx+edi],xmm2
             .no_editt:
             @@:                         ; translate vertices into 2d
                call      translate_points

                cmp       .bezier_flag,0
                je        .nbez
                xor       eax,eax          ; clear draw buffer
                mov       edi,.scr
                movzx     ebx,.xres_var
                movzx     ecx,.yres_var
                imul      ecx,ebx  
                cld
                rep       stosd

             .nbez:
                cmp       .to_piec_flag,1
                jne       @f
                call      to_pieces_anim
              @@:
                cmp       .morph_flag,0
                je        @f
                call      morph_anim
              @@:
                movzx     eax,.rend_opts
                bt        eax,7
                jc        .one_th
                mov       edx,0xC0000000  ; env 2 thread models
                mov       ecx,2
                bt        eax,1           ; all env based models
                jc        .ddraw
                cmp       .draw_flag,byte 5   ; plain tex
                je        .ddraw
                bt        eax,6           ; all grd/flat based models
                jc        .two_th_grd
                xor       eax,eax         ; clear  buffer
                mov       edi,.slc_cnt_ptr
                movzx     ebx,.xres_var
                movzx     ecx,.yres_var
                imul      ecx,ebx
                shr       ecx,2
                cld
                rep       stosd
                ; cmp       .dr_fl,20
                ; jne       .pee
                cmp       .draw_flag,byte 17
                je        .pee
                cmp       .draw_flag,byte 18
                je        .pee
                movups    xmm5,.y_min
                call      do_stencil       ; do stencil through peel
                cmp       .draw_flag,byte 20
                jne       .pee
                movzx     eax,.xres_var
                movzx     ebx,.yres_var
                mov       edi,.slc_cnt_ptr
                call      count_transparent_voxels
                ; mov     [transparent_voxels_count],ecx
                push      ecx
                add       ecx,200
                shl       ecx,2
                malloc    ecx
                mov       [voxels_depth_ptr],eax
                ; make baackground in slices history buff
                cld
                mov       eax,60000.1          ; clear draw buffer
                mov       edi,[voxels_depth_ptr]
                pop       ecx
                rep       stosd
                ; fill poointers buff
                mov       eax,[voxels_depth_ptr]
                mov       esi,.slc_cnt_ptr
                mov       edi,.sl_ptrs_b_ptr
                movzx     edx,.xres_var
                movzx     ecx,.yres_var
                imul      ecx,edx
                call      do_variable_slices_ptr_buff  ; file procs_b.inc
             .pee:
                mov       ecx,4      ; all 4 threaded render models
                xor       edx,edx
             .ddraw:
             
                mov       eax,peel
                ; in: eax =   proc-th adress
                ;     edx =   param bitewise 'and' with param passed to proc_th
                call      call_thread
                jmp       .skip_dr
             .two_th_grd:
                mov       eax,draw_thread
                xor       edx,edx         ; grd models
                mov       ecx,2           ; ecx = thread count
                call      call_thread
                jmp       .norm_vect
             .one_th:
                mov       ebx,0x80000000  ; marker one thread, edges, verts
                xor       eax,eax
                call      draw_thread
                jmp       .norm_vect
             .skip_dr:
                mfree     [voxels_depth_ptr]
             .norm_vect:
                xor       ecx,ecx
                mov       eax,'vert'
                mov       bl,.normals_flag
                or        bl,bl
                jz        .no_nr
                cmp       bl,1
                cmove     eax,ecx
                call      draw_triangles_normals
             .no_nr:
             @@:
                cmp       .submit_flag,1
                je        @f
                cmp       [do_submit],1
                jne       .no_submit
             @@:
                mov       esi,.matrix_scaled
                lea       edi,.mx
                call      reverse_mx_3x3
                xorps     xmm0,xmm0
                mov       esi,.points_rotated_ptr
                mov       edi,.points_r_ptr
                mov       ecx,.points_count_var
                cvtdq2ps  xmm3,[xxadd]
                movlhps   xmm3,xmm0
             @@:
                movups    xmm0,[esi]
                subps     xmm0,xmm3
                movups    [edi],xmm0
                add       esi,12
                add       edi,12
                loop      @b

                mov       esi,.points_r_ptr
                mov       edi,esi ;[edit_particle_no]
                lea       ebx,.mx
                mov       ecx,.points_count_var
                call      rotary
                or        ebx,-1
                xor       eax,eax
                mov       [edit_particle_no],ebx
                mov       edi,edit_start_x
                cld
                stosd
                stosd
                stosw
        ;        mov       dword[edit_end_x],eax
        ;        mov       dword[edit_start_x],eax
        ;        mov       dword[edit_start_z],eax
                mov       [do_submit],al
                cmp       [submit_flag],1
                jne       .no_submit
                mov       [submit_flag],al
                lea       esi,[NextMsub]
                lea       edi,[NextMxadd]
                mov       ecx,17
                rep       movsw
                mov       edi,ffd_nodes         ; zero ffd
                mov       esi,ffd_nodes_intial
                mov       ecx,16*6*3
                cld
                rep       movsd
                call      write_info
                call      NextMdraw_all
                xor       eax,eax
                call      draw_menu
             @@:
             .no_submit:
                mov       al,.ffd_flag
                cmp       al,4   ; twist
                je        .no_bezA
                or        al,al
                jnz       @f
                cmp       .bezier_flag,0
                je        .no_bezA
                cmp       .derive_flag,0
                je        .no_bezA
             @@:
                ; cmp       [ffd_flag},3
                ; je        .deformat_splin
                mov       edi,Def_Derv_copy
                mov       esi,Def_Derv
                cld
                mov       ecx,2000          ; will be enough
                rep       movsd
                ; cmp       [ffd_flag],3
                ; je        .deformat_splin
                mov       eax,ffd_nodes_rot
                mov       esi,Def_Derv_copy
                cmp       .ffd_flag,3
                cmovne    esi,eax
                mov       edi,[edit_buf_ptr]
                movzx     ecx,.xres_var
                movzx     eax,.yres_var
                imul      ecx,eax
                or        eax,-1
                cld
                rep       stosd
                cmp       [ed_bpatch.derive_on],0
                je        @f
                movzx     eax,[ed_bpatch.derive_no]
                cmp       ax,0xffff
                je        @f
                movd      xmm3,eax
                imul      eax,12
                add       esi,eax
                xorps     xmm2,xmm2
                movss     xmm0,[ed_bpatch.derives_x_start]
                movss     xmm1,[ed_bpatch.derives_x_end]
                punpcklwd xmm0,xmm2
                punpcklwd xmm1,xmm2
                psubd     xmm1,xmm0
                cvtdq2ps  xmm1,xmm1
                ; rcpss     xmm2,[scale]
                ; shufps    xmm2,xmm2,11110000b
                ; mulps     xmm2,xmm1
                ; movzx     ecx,[ffd_flag]
                ; cmp       ecx,3
                ; je        .deformat_splin
                movups    xmm0,[esi]
                addps     xmm0,xmm1
                movlps    [esi],xmm0
             @@:
                mov       ebx,ffd_rect
                movzx     ecx,.ffd_flag
                cmp       ecx,3
                je        .deformat_splin
                cmp       ecx,1
                je        ..do
                or        ecx,ecx
                je        @f
                shl       ecx,1
                jmp       ..do
             @@:
                cmp       .bezier_flag,0
                je        .no_bezA
                movzx     ecx,word[bez_patch_count]
                mov       ebx,[derv_rect_ptr]
             ..do:
             @@:
                push      ecx
                push      ebx
                mov       edi,.screen_ptr
                mov       esi,ffd_nodes_rot
                call      draw_bezier_derives  ; only for sigle patch
                pop       ebx
                add       ebx,16*2
                pop       ecx
                loop      @b

                mov       esi,ffd_nodes_rot
                mov       ecx,1  ;6
                cmp       .ffd_flag,2
                jne       @f
                mov       ecx,16*3
              @@:
                cmp       .ffd_flag,1
                jne       @f
                mov       ecx,16
              @@:
                cmp       .bezier_flag,0
                je        @f
                movzx     ecx,[bez_nodes_count] ;eax  ;[bez_patch_count]
              @@:
                ;in:
                ; esi -   ptr to derives vertices list
                ; ebx -   ptr to derives indexes (16 pro one rectangle)
                ; ecx -   number of nodes
                ; globals - screen_ptr, i12, ..
                call      draw_bezier_bars
                jmp       .no_bezA
             .deformat_splin:
                mov       esi,Def_Derv_copy
                mov       ecx,3
                call      pipe_a
             .no_bezA:
                ;*******************draw edges that intersects
                mov       .tex_ptr,0x0000ff00  ; col
                xor       edx,edx
                cmp       .valencEd_flag,1
                jne       .chck_inter
                mov       edx,.edge_s_d_ptr
             .intersect_valence:
                xor       ecx,ecx
                mov       edi,.edges_ptr
             .checkit:
                push      edx
                push      ecx
                push      edi

                mov       eax,ecx
                mov       ebx,ecx
                shr       eax,3
                and       ebx,111b
                add       eax,edx
                bt        dword[eax],ebx
                jnc       .intersect_loop
                mov       eax,[edi]
                mov       ebx,[edi+4]
                imul      eax,12
                imul      ebx,12
                add       eax,.points_rotated_ptr
                add       ebx,.points_rotated_ptr
                movlps    xmm5,[eax]
                movhps    xmm5,[ebx]
                xorps     xmm6,xmm6
                movss     xmm7,.xres_vard
                punpcklwd xmm7,xmm6
                movlhps   xmm7,xmm7
                cvtps2dq  xmm5,xmm5
                sub       esp,16
                movups    [esp],xmm5
                pop       eax ebx ecx edx
                movups    xmm5,.width
                mov       edi,plain_horizontal
                call      line_grd_tex
             .intersect_loop:
                pop       edi
                add       edi,8
                pop       ecx
                pop       edx
                inc       ecx
                cmp       ecx,.edges_count
                jnz       .checkit
             .chck_inter:
                mov       ebx,[edges_intersect_ptr]
                xor       eax,eax
                or        ebx,ebx
                jz        .no_inter
                cmp       .chunks_o_flag,9
                je        @f
           ;     cmp       edx,ebx
           ;     je        .no_inter
           ;     jmp       .intersect_valence
           ;   @@:
                cmp       .mark_coll_ed_flag,al
                je        .no_inter
              @@:
                cmp       edx,ebx
                je        .no_inter
                mov       edx,ebx
                jmp       .intersect_valence
             .no_inter:
                ; *******************write chunk marks
          ;      xor       eax,eax
          ;      cmp       [ins_tris_flag],1
          ;      je        @f
                cmp       .show_ch_flag,1
                jne       .no_chunk
          ;    @@:
          ;      cmp       [chunks_count],eax
          ;      je        .no_chunk
          ;      cmp       [chunks_ptr],eax
          ;      je        .no_chunk
                push      [rand_seed]
                or        eax,-1         ;
                mov       edi,[edit_buf_ptr]
                movzx     ebx,.xres_var
                movzx     ecx,.yres_var
                imul      ecx,ebx
                cld
                rep       stosd
                ; ecx = 0
             ;   xor       ecx,ecx
                mov       esi,.triangles_ptr
             ;   mov       eax,[inside_tris_ptr]
             .nx_chunk:
                push      ecx
                mov       eax,ecx
                add       eax,eax
                add       eax,[chunks_ptr]
                mov       cx,[eax]
                movzx     ecx,cx
                push      ecx
                add           ecx,100
                mov       edx,ecx      ; simply get bar color    
                imul      edx,1000001  ;
                ror       cx,4         ;
                rol       edx,cl       ;          
                cld
                lodsd
                xchg      eax,edi
                lodsd
                xchg      eax,ebx
                lodsd
                xchg      eax,edi
                imul      eax,12
                imul      ebx,12
                imul      edi,12
                pop       ecx
                push      esi
                mov       esi,.points_rotated_ptr
                movlps    xmm6,[eax+esi]
                movlps    xmm5,[ebx+esi]
                movlps    xmm0,[edi+esi]
                rcpps     xmm4,[const3]
                addps     xmm0,xmm5
                addps     xmm0,xmm6
                mulps     xmm0,xmm4
                mov       eax,edx
                movzx     esi,.yres_var
                movzx     edx,.xres_var
                sub       esi,4
                sub       edx,4
                ; ecx - edge index; signature
                ; xmm0 - coordinates
                ; eax - color
                ; edx  - xresm4
                ; esi  - yresm4
                call      bar_edge
             .skip:
                pop       esi
                pop       ecx
                inc       ecx
                cmp       ecx,.triangles_count_var
                jnz       .nx_chunk
                pop       [rand_seed]
             .no_chunk:
                ; *******************write inner vertices marks
                xor       eax,eax
                cmp       .inner_vert_ptr,eax
                jne       @f
                cmp       .inner_vert_flag,al
                je        .no_in_vert
              @@:
                xorps     xmm6,xmm6
                movss     xmm7,.xres_vard
                mov       eax,4
                punpcklwd xmm7,xmm6
                cvtsi2ss  xmm4,eax
                cvtdq2ps  xmm7,xmm7
                shufps    xmm4,xmm4,0
             ;   movaps    xmm4,[const4]
                subps     xmm7,xmm4
                movzx     edx,.xres_var
                shl       edx,2
                mov       esi,edx
                sub       edx,12
                mov       ecx,.points_count_var
                cld
             .nx_in_vert:
                push      ecx
                mov       edi,ecx
                mov       eax,ecx
                shr       eax,3
                and       ecx,111b
                add       eax,.inner_vert_ptr
                xor       ebx,ebx
                bt        [eax],ecx   ; if bit is set vertex is not inside
                ; attempt for avoid multiple jumps
                adc       ebx,0
                shl       ebx,6
                imul      edi,12
                add       edi,.points_rotated_ptr
                movlps    xmm0,[edi]
                movaps    xmm1,xmm0
                movaps    xmm2,xmm0
                cmpltps   xmm1,xmm4 ;[const4]
                cmpltps   xmm2,xmm7
                xorps     xmm2,xmm1
                movmskps  eax,xmm2
                and       eax,11b
                add       eax,ebx
                cmp       eax,11b
                jne       .end_in_vrtl
                cvtps2dq  xmm0,xmm0
                sub       esp,8
                movlps    [esp],xmm0
                pop       eax edi
                dec       eax
                dec       edi
                shl       eax,2
                imul      edi,esi
                add       edi,eax
                add       edi,.scr
                mov       eax,0x000fffff    ; col
                stosd
                stosd
                stosd
                add       edi,edx
                stosd
                stosd
                stosd
                add       edi,edx
                stosd
                stosd
                stosd
             .end_in_vrtl:
                pop       ecx
                loop      .nx_in_vert
             .no_in_vert:
                ; ******************triangulize area/rect
                cmp       .set_tri_area_flag,0
                jne       @f                       ;
                call      draw_triangulize_rect
             @@:
             .sketch:
              ;  cmp       .NextMed_flag,5
              ;  jne       .no_sketch
                ; draw sketch buff
                mov       edi,[screen_ptr]
                mov       esi,[sketch_buff_ptr]
                or        esi,esi
                jz        .no_sketch
                movzx     ecx,.xres_var
                movzx     ebx,.yres_var
                mov       edx,0xffff
                imul      ecx,ebx
                shr       ecx,3 + 2
                cld
              .loop_sketch:
                lodsd
                push      ecx
                ; expand bits into dwords ..
                xor       ecx,ecx
              .llsc:
                xor       ebx,ebx
                bt        eax,ecx
                cmovc     ebx,edx
                or        [edi],ebx
                add       edi,4
                inc       ecx
                cmp       ecx,32
                jne       .llsc
                pop       ecx
                loop      .loop_sketch

             .no_sketch:
                cmp       .lpipe_flag,1
                jnae      @f
                mov       esi,long_pipe_derv_rotated
                mov       ecx,[long_pipe_segs_c]
                lea       ecx,[ecx*3]
                mov       edi,.scr
                push      ecx
                movzx     eax,.xres_var
                call      draw_long_pipe_derv_lines
                mov       esi,long_pipe_derv_rotated
                pop       ecx
                movzx     eax,.xres_var
                movzx     ebx,.yres_var
                mov       edx,.scr
                mov       edi,[edit_buf_ptr]
                call      write_edit_bars_vert  ; write handlers to derives
            @@:
                call      write_info
            ;   call      NextMdraw_all
            ;    stretch_menu
            .end:
                add       esp,300
                pop       ebp
                pop       edi esi ebx
ret
;==================================================================
;==================================================================
;==================================================================
;==================END MAIN RENDERING PROC=========================
;==================================================================
;==================================================================
;==================================================================
include '2normal.inc'
re_alloc_stenc_shadows:
; in: al - flag
;     al = 0 -> free
;     al = 1 -> alloc
;     al = 2 -> realloc
; malloc / dealloc / realloc
; memory describing shadow stencil buffers
                push    ebp
                mov     ebp,esp
                sub     esp,10
                .St_size1_v   equ [ebp-4]
                .St_size2_buf equ [ebp-8]
                .mark         equ byte[ebp-9]
                cmp     [stencil_s_flag],0
                je      .end
                mov     .mark,al
                cmp     al,1
                je      .st_alloc
         ;       cmp     [stencil_s_flag],0
         ;       je      .end
                mov     eax,shd_stencil_verts_A_ptr
                mov     ecx,3
              @@:
                push    ecx
                push    eax
                mfree   [eax]
                mov     eax,[esp]
                xor     edx,edx
                mov     [eax],edx
                add     eax,4
                mfree   [eax]
                pop     eax
                xor     edx,edx
                mov     [eax+4],edx
                add     eax,44
                pop     ecx
                loop    @b
                cmp     .mark,2
                jne     .end
             .st_alloc:
                mov     eax,[points_count_var]
                add     eax,20
                imul    eax,12
                mov     .St_size1_v,eax
                mov     eax,xres_var
                movzx   ebx,word[eax]
                movzx   eax,word[eax+2]
                imul    eax,ebx
                add     eax,40
                shl     eax,2
                mov     .St_size2_buf,eax
                mov     ebx,shd_stencil_verts_A_ptr
                mov     ecx,3
             @@:
                push    ecx
                push    ebx
                malloc  .St_size1_v
                mov     ebx,[esp]
                mov     [ebx],eax
                malloc  .St_size2_buf
                pop     ebx
                add     ebx,4
                mov     [ebx],eax
                add     ebx,40     ; mx
                pop     ecx
                loop    @b
             .end:
                mov     esp,ebp
                pop     ebp
ret
;=============================================================
normalize_all_light_vectors:
                mov     edi,lights1
              @@:
                call    normalize_vector    ; 3dmath.inc
                add     edi,LIGHT_SIZE
                cmp     edi,lightsend
                jl      @b
ret
;=============================================================
;=============================================================
;=============================================================
read_hrt:
                ; partly generated 3d object
                push       ebp
                mov        ebp,esp
                sub        esp,50
                .valhr                          equ dword[ebp-4]
                .val2hr                         equ dword[ebp-8]
                .val3hr                         equ dword[ebp-12]
                .val4hr                         equ dword[ebp-16]
                .triangles_normals_ptr          equ dword[ebp-20]    ; \
                .triangles_normals_rotated_ptr  equ dword[ebp-24]    ;  \
                .points_normals_ptr             equ dword[ebp-28]    ;  |
                .pcv                            equ dword[ebp-32]    ;  | > dont change order
                .tcv                            equ dword[ebp-36]    ;  |
                .points_r_ptr                   equ dword[ebp-40]    ;  /
                .triangles_ptr                  equ dword[ebp-44]    ; /
                lea        eax,[triangles_normals_ptr]
                lea        ebx,.triangles_normals_ptr
                mov        ecx,7
             .bb2:
                mov        [ebx],eax
                sub        ebx,4
                add        eax,4
                loop       .bb2
             ..factorhr = 2
                mov        esi,long_pipe_derv_init
                mov        edi,long_pipe_derv+12
                mov        ecx,32 * ..factorhr
                lea        ecx,[ecx*3 + 8]
                or         edx,-1
                call       init_long_pipe_derives
                mov        edx,[long_pipe_vert_ptr]
                mov        eax,(2 + 3) shl 2             ; one segment
                mov        ebx,32 * 3 * ..factorhr + 8   ; segments count
                mov        ecx,long_pipe_derv
                mov        esi,1  ; bspl [lpcurve_tp]
                call       calc_long_curve
                or         ebx,-1
                call       zero_flags
                ; memory  work
                ; 50000   element as  minimal
                mov        eax,'frea'
                call       free_mem_for_tp
                mov        eax,.tcv
                mov        ebx,.pcv
                mov        edx,49000
                mov        [eax],edx  ; dword 49000
                mov        [ebx],edx  ; dword 49000
                mov         eax,'alla'
                call       alloc_mem_for_tp
                mov        esi,[long_pipe_vert_ptr]
                mov        ecx,32 * 3 * ..factorhr * (2 + 3 shl 2) ; whole pipe verts count
                ; calc long pipe
                mov        eax,8       ; rotary count per one turn
                mov        edx,0.013   ; radius
                movd       xmm0,edx
                mov        ebx,.triangles_ptr
                mov        edi,.points_r_ptr
                mov        ebx,[ebx]
                mov        edi,[edi]
                ; in:  esi  - ptr to curve vertices - middle points of pipe
                ;      ecx  - curve vertices count
                ;      eax  - rotary steps count as integer
                ;      xmm0 - radius, lowest dword float
                ;      ebx  - ptr to triangles list
                ;      edi  - ptr to p ipe vertices
                ; out: ecx  - triangles count
                ;      ebx  - vertices count
                call       do_long_pipe
                mov        eax,.tcv
                mov        edi,.pcv
                mov        [eax],ecx
                mov        [edi],ebx
                xchg       ebx,ecx
                mov        edi,.points_r_ptr
                add        eax,[edi]
                mov        edi,[edi]
                cld
             .l:
                mov        eax,[edi+8]
                mov        edx,[edi+4]
                add        edi,4
                stosd
                xchg       eax,edx
                stosd
                ; mov      [edi+4],eax
                ; mov      [edi+8],edx
                ; add      edi,12
                ; add      edi,4
                loop       .l
                push       edi  ; end
                mov        eax,.points_r_ptr
                mov        edi, dword[eax]
                mov        ecx, ebx
                call       normalize_object
                mov        .valhr,1.33
                mov        .val2hr,0.12
                mov        .val3hr,0.6
                mov        edx,.points_r_ptr
                mov        ecx,.pcv
                mov        edx,[edx]
                mov        ecx,[ecx]
                fninit
             .fix:
                fld        dword[edx]
                fld        st
                fmul       .valhr
                fstp       dword[edx]
                fld        dword[edx+4]
                fadd       .val2hr
                fstp       dword[edx+4]
                fcos
                fmul       .val3hr
                fmul       dword[edx+8]
                fstp       dword[edx+8]
                add        edx,12
                loop       .fix

                mov        edx,.triangles_ptr   ; sense 'z' coof of nr vec fix
                mov        ecx,.tcv
                mov        edx,[edx]
                mov        ecx,[ecx]
             .b:
                mov        eax,[edx]
                push       dword[edx+4]
                pop        dword[edx]
                mov        [edx+4],eax
                add        edx,12
                loop       .b
                mov        eax,.pcv
                pop        edi
                push       dword [eax]
                mov        esi,hrt_points
                mov        ecx,hrt_points_count
                push       edi   ; first param of norm_ob
                push       ecx   ; sec param
                xorps      xmm0,xmm0
             .bb:
                movlps     xmm1,[esi]
                punpcklbw  xmm1,xmm0
                punpcklwd  xmm1,xmm0
                cvtdq2ps   xmm1,xmm1
                movups     [edi],xmm1
                inc        dword [eax]
                add        edi,12
                add        esi,3
                loop       .bb
                pop        ecx
                pop        edi
                call       normalize_object
                call       init_triangles_normals
                call       init_point_normals
                xorps      xmm0,xmm0
                mov        eax,0.07
                movd       xmm0,eax
                shufps     xmm0,xmm0,11000000b
                mov        eax,.points_normals_ptr
                mov        esi,.points_r_ptr
                mov        ecx,.pcv
                mov        eax,[eax]
                mov        esi,[esi]
                mov        ecx,[ecx]
                imul       ecx,12
                add        ecx,esi
             .bb3:
                push       eax ecx
                mov        ecx,30
                mov        edx,64
                call       random
                mov        edx,eax
                imul       edx,12
                pop        ecx eax
                movups     xmm1,[eax]
                mulps      xmm1,xmm0
                movups     xmm2,[esi]
                addps      xmm2,xmm1
                movups     [esi],xmm2
                add        eax,edx
                add        esi,edx
                cmp        esi,ecx
                jb         .bb3
                mov        edi,.triangles_ptr
                mov        eax,.tcv
                mov        edi,[edi]
                mov        ebx,[eax]
                imul       ebx,12
                add        edi,ebx
                pop        edx
                mov        esi,hrt_triangles
                mov        ecx,hrt_triangles_count
                add        [eax],ecx
                cld
                lea        ecx,[ecx*3]
             .b3:
                xor        eax,eax
                lodsb
                add        eax,edx
                stosd
                loop       .b3
                mov        esp,ebp
                pop        ebp
                ret
;=============================================================
;=============================================================
alloc_mem_for_tp:
;; in eax=1 - free all mem , full alloc
;;    eax=0 - not free mem, alloc all but [triangles_ptr], and [points_r_ptr]
;;    eax = 100 - alloc only for triangles_ptr and points_r_ptr not free at all
;;    eax=2 - not free mem full alloc
                push     ebp
                mov      ebp,esp
                sub      esp,16
                .mark    equ dword[ebp-4]
                .pcv     equ dword[ebp-8]
                .tcv     equ dword[ebp-12]
                mov      .mark,eax
                mov      edx,[points_count_var]
                mov      ebx,[triangles_count_var]
                add      edx,400
                add      ebx,400
                mov      eax,.mark
                mov      .pcv,edx
                mov      .tcv,ebx
                cmp      eax,'firs'      ; first allocation
                je       .l1
                mov      eax,.mark
                cmp      eax,'alla'      ; whole allocation
                je       .l1
                cmp      eax,'tpon'      ; tpon only for tri and vert
                je       .l1
                or       eax,eax
                jz       .no_tp
                cmp      eax,2
                je       .l1
                cmp      eax,100
                je       .l1
                call     free_mem_for_tp
                ; jmp     .no_tp
             .l1:
                mov      eax,.pcv
                add      eax,40
                imul     eax,12

                malloc   eax
                mov      [points_r_ptr],eax
                mov      eax,.tcv
                add      eax,65
                imul     eax,12
                malloc   eax
                mov      [triangles_ptr],eax
                cmp      .mark,'tpon'
                je       .end
                cmp      .mark,100
                je       .end
             .no_tp:
             .n_ch:
                mov      eax,.tcv
                imul     eax,24
                malloc   eax
                mov      [edges_ptr],eax
                mov      eax,.tcv
                imul     eax,12
                push     eax
                malloc   eax
                mov      [triangles_normals_ptr],eax
                pop      eax
                malloc   eax
                mov      [triangles_normals_rotated_ptr],eax
                mov      ebx,.pcv
                imul     ebx,12
                malloc   ebx
                mov      [points_normals_ptr],eax
                ; malloc ebx
                ; mov    [shadow_vertices_ptr],eax
                malloc   ebx
                mov      [points_normals_rotated_ptr],eax
                malloc   ebx
                mov      [points_rotated_ptr],eax
                mov      ebx,.pcv
                shl      ebx,2

                malloc   eax
                mov      [tex_points_ptr],eax
                shl      ebx,1
                malloc   ebx
                mov      [tex_points_f_ptr],eax
             .end:
                mov      esp,ebp
                pop      ebp
ret
;==================================================================
free_mem_for_tp:
;    eax = 33 -> not free for tris and verts
                push    ebp
                mov     ebp,esp
                sub     esp,70
                .triangles_normals_ptr         equ dword[ebp-64]
                .triangles_normals_rotated_ptr equ dword[ebp-60]
                .points_normals_ptr            equ dword[ebp-56]
                .points_count_var              equ dword[ebp-52]
                .triangles_count_var           equ dword[ebp-48]
                .points_r_ptr                  equ dword[ebp-44]
                .triangles_ptr                 equ dword[ebp-40]
                .points_rotated_ptr            equ dword[ebp-36]
                .points_normals_rotated_ptr    equ dword[ebp-32]
                .edges_ptr                     equ dword[ebp-28]
                .edges_count                   equ dword[ebp-24]

                cld
                lea      esi,[triangles_normals_ptr]
                lea      edi,.triangles_normals_ptr
                mov      ecx,11
                rep      movsd
                cmp      eax,'frea' ; free all
                je       .freeall
                cmp      eax,'notp'
                je       .no_tp
                cmp      eax,33
                je       .no_tp
             .freeall:
                mfree    .points_r_ptr
                mfree    .triangles_ptr
             .no_tp:
                mfree    .edges_ptr
                mfree    .triangles_normals_ptr
                mfree    .triangles_normals_rotated_ptr
                mfree    .points_normals_ptr
                mfree    .points_normals_rotated_ptr
                mfree    .points_rotated_ptr
                mfree    [tex_points_ptr]
                mfree    [tex_points_f_ptr]
                mfree    [chunks_ptr]
             .end:
                mov      esp,ebp
                pop      ebp
                ret
if 0
;=======================================================
pack_triangles_normals:
; two cooeficients x and y each 11 bits hst bit -> sign
; hst one bit -> z sign
; Whole normal is 24 bits, 3 bytes in size. One hgst bit free
                push     ebp
                mov      ebp,esp
                sub      esp,120
                .pts_ptr equ dword[ebp-4]
                .vec_n   equ dword[ebp-60]
                push     [points_r_ptr]
                pop      .pts_ptr
                mov      ecx,[triangles_count_var]
                mov      esi,[triangles_normals_ptr]
                mov      edi,[triangles_normals_packed]
                mov      eax,1000
                cvtsi2ss xmm2,eax
                shufps   xmm2,xmm2,0
                cmpeqd   xmm4,xmm4
                pslld    xmm4,xmm4,31
              .ll2:
                movups   xmm0,[esi]
                movaps   xmm5,xmm0
                mulps    xmm0,xmm2
                andps    xmm5,xmm4  ; xm5 = sign pack
                cvtps2dq xmm1,xmm0
                packusdw xmm1,xmm1
                push     ecx
                xor      ecx,ecx
                mov      [edi],ecx
              .ll:
                movd     eax,xmm1
                and      eax,0x3ff ; 10 bits
                psrldq   xmm1,2
                shl      eax,cl
                or       [edi],eax
                inc      edi
                add      ecx,4
                cmp      ecx,8
                jnz      @b
                pop      ecx
                inc      edi
                add      esi,12
                loop     .ll2

end if
;===================================================================
;====================================================================
init_triangles_normals:  ; FPU ver orginally by Mikolaj Feliks
                         ; SSE changes by macgub
                prompt   prompt_norm
                push     ebp
                mov      ebp,esp
                sub      esp,120
                .pts_ptr equ dword[ebp-4]
                .vec_n   equ dword[ebp-60]
                push     [points_r_ptr]
                pop      .pts_ptr
                mov      ecx,[triangles_count_var]
                mov      edi,[triangles_normals_ptr]
                mov      esi,[triangles_ptr]
             .b:
                cld
                lodsd
                xchg     eax,ebx
                lodsd
                xchg     edx,eax
                lodsd
                push     esi
                mov      esi,.pts_ptr
                imul     eax,12
                imul     ebx,12
                imul     edx,12
                push     edi
                movups   xmm0,[eax+esi]
                movups   xmm1,[ebx+esi]
                movups   xmm2,[edx+esi]
                subps    xmm0,xmm1
                subps    xmm1,xmm2
                call     cross_reg
            ;    lea      edi,.vec_n
            ;    movups   [edi],xmm0
            ;    call     normalize_vector
            ;    movups   xmm0,[edi]
                movaps   xmm1,xmm0
                dpps     xmm0,xmm0,01110111b
                rsqrtps  xmm0,xmm0
                mulps    xmm0,xmm1
                pop      edi
                pop      esi
                movups   [edi],xmm0
                add      edi,12
                dec      ecx
                jnz      .b
           ;     loop     .b
                cls
                mov      esp,ebp
                pop      ebp
ret
;=====================================================================
init_point_normals:
                .end_ptr equ dword[ebp-4]
                .t_ptr   equ dword[ebp-8]
                .ptr2    equ dword[ebp-12]
                push     ebp
                mov      ebp,esp
                sub      esp,30
                mov      ebx,[triangles_count_var]
                push     ebx
                imul     ebx,24
                mov      .end_ptr,ebx
                add      ebx,1000
                malloc   ebx
                mov      .t_ptr,eax
                add      .end_ptr,eax
                malloc   ebx
                mov      .ptr2,eax
                mov      edi,.t_ptr
                xor      eax,eax
                mov      esi,[triangles_ptr]
                pop      ecx
                cld
             @@:
                movsd          ; first - vert index, sec tri index
                stosd
                movsd
                stosd
                movsd
                stosd
                inc      eax
                loop     @b
                mov      esi,.t_ptr
                mov      edi,.ptr2
                mov      ecx,[triangles_count_var]
                lea      ecx,[ecx*3]
                call     sort_hybrid
                mov      esi,.t_ptr
                mov      edi,[points_normals_ptr]
             .ag1:
                cmp      esi,.end_ptr
                jae      .enn
                lodsd
                mov      edx,eax
                pxor     xmm1,xmm1
             @@:
                cmp      esi,.end_ptr
                jae      .enn
                lodsd
                imul     eax,12
                add      eax,[triangles_normals_ptr]
                movups   xmm0,[eax]
                addps    xmm1,xmm0        ;         vv
                lodsd
                cmp      eax,edx
                je       @b
                sub      esi,4
                movups   [edi],xmm1
                call     normalize_vector
                add      edi,12
                jmp      .ag1
             .enn:
                mfree    .t_ptr
                mfree    .ptr2
                mov      esp,ebp
                pop      ebp
                ret
;================================================
draw_menu:
;    in:
;       eax != 0 - skip clear menu screen buffer
                or      eax,eax
                jnz     .ffa
                mov     edi,menu_screen
                mov     ecx,MXRES*MYRES   ;+ IN_CNST)
                xor     eax,eax
                cld
                rep     stosd
             .ffa:
                mov     esi,menu_data
             .lab:
                cld
                push    esi
                inc     esi
                lodsd
                push    eax
                xchg    eax,ebx
                lodsd
                xchg    eax,ebx
                ror     eax,16
                xchg    ax,bx
                ror     eax,16
                push    esi

                mov     esi,MYRES
                mov     edx,MXRES
                push    esi
                push    edx
                movlps  xmm7,[esp]
                push    dword menu_screen
                push    dword 0x0000ff00
                push    edx
                movups  xmm4,[esp]
                add     esp,20
                xorps   xmm6,xmm6
                call    write_frame
                pop     esi
                pop     eax
                ; write button desc
                ror     eax,16
                add     eax,0x00030000
                mov     ebx,font_table + 62 ; / font_tab2 + 62
                mov     ecx,MXRES shl 16 + MYRES

                mov     edi,menu_screen
                mov     edx,16 ; / 9
                call    write_text
                add     eax,90 shl 16 + 0   ; write flag of button

                movzx   edi,word[esi+14]    ; butt no ?
                add     edi,menu_flags-1
                cmp     [edi],byte -1
                ; flag = -1 ?
                je      @f
          ;     cmp     word[esi+12],bx
          ;     je      @f
                movzx   edi,byte[edi] ;byte[esi+12]
                imul    edi,5
                mov     ebx,font_table + 62  ; / font_tab2 + 62
                movzx   esi,word[esi+12]
                add     esi,edi
                add     esi,flags_caption
                mov     edi,menu_screen
                mov     edx,16 ; / 9
                call    write_text
             @@:
                pop     esi
                add     esi,menu_data.op
                or      ebx,-1
                cmp     word[esi],bx
                jne     .lab
                ret
;===================================================================
;=================================================================
random_point_light_position:
                xor      ebx,ebx
             @@:
                push     ebx
                mov      ecx, -1000
                mov      edx, 1000
                call     random
                cvtsi2ss xmm0,eax
                pop      ebx
                movss    [point_light_coords+ebx],xmm0
                add      ebx,4
                cmp      ebx,8
                jnz      @b
                xor      ebx,ebx
                mov      eax,255
                cvtsi2ss xmm1,eax
             @@:
                push     ebx
                xor      ecx,ecx
                mov      edx,255
                call     random
                cvtsi2ss xmm0,eax
                divss    xmm0,xmm1
                pop      ebx
                movss    [plane_equation+ebx],xmm0
                add      ebx,4
                cmp      ebx,8
                jnz      @b
                ret
;====================================================
malloc_proc:
                pop_abi_regs
                invoke   VirtualAlloc,NULL,eax,MEM_COMMIT+MEM_RESERVE,PAGE_EXECUTE_READWRITE
                or       eax,eax
                jnz      .malloc_flag
                call     mem_error
                ; invoke   MessageBox,NULL,_error_mem,NULL,MB_ICONERROR+MB_OK
             .malloc_flag:
                malloc_proc_end:
                push_abi_regs
                ret
mfree_proc:
                pop_abi_regs
                invoke   VirtualFree ,eax ,0 , MEM_RELEASE
                push_abi_regs
ret
;====================================================
mem_error:
                invoke   MessageBox,NULL,_error_mem,NULL,MB_ICONERROR+MB_OK
                invoke   Sleep,500
                ; invoke  ReleaseDC,[hwnd],[hdc]
                ; invoke  DestroyWindow,[hwnd]
                ; invoke  ExitProcess,[msg.wParam]
                ret
;=====================================================
;cls_proc:
;                pushad
;                pop_abi_regs
;                movzx     eax,[xres_var]   ; clear text with render metod
;                invoke    StretchDIBits,[hdc],0,0,eax,30,0,0,1,1,fakebitmap,Wbmi,0,SRCCOPY
;                push_abi_regs
;                popad
;ret

                ; include 'fog.inc'
                include  'tesselate.inc'
                include  '3dmath.inc'
                include  'procs_a.inc'
                include  'procs_b.inc'
                include  'procs_c.inc'
                include  'sort.inc'
                include  'chunks.inc'
                include  'arbitrary_vector.inc'
                include  'asc.inc'
                include  'menu.inc'
                include  'edit.inc'
                include  'bezier_surface.inc'
                include  'bez3.inc'
                include  'long_pipe.inc'
                include  'normals.inc'
                include  'file.inc'
                include  'formats.inc'
                include  'write_files.inc'
              ;  include  'cos_tr.inc'
              ;  include  'jpeg_code.inc'
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; data
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
                section      '.data' data readable writeable
                _class       TCHAR      'Win3ds',0
                _title       TCHAR      'Ave Cruce Salus Mea',0
                _error       TCHAR      'Startup failed.',0
                _error_mem   TCHAR      'Memory fail.',0
                ; include     'jpeg_init.inc'
                ; include     'cudadata.inc'
              ;  fakebitmap:  dd 10 dup  10
                include      'data.inc'

                ; include     'cudadatauninit.inc'
              ;  include      'jpeg_uninit.inc'
                section      '.idata' import data readable writeable
                library      kernel32,'KERNEL32',user32,'USER32',gdi32,'GDI32',winmm,'WINMM' , comdlg32,'COMDLG32'; ,  cuda,'NVCUDA.DLL'        ;   timing,'timing.dll'
                include      '.\include\api\kernel32.inc'
                include      '.\include\api\user32.inc'
                include      '.\include\api\gdi32.inc'
                include      '.\include\api\comdlg32.inc'
                ; include     'api_cuda.inc'
